{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyNeIKbkvfK6yckn1IezADdY"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["If we use X = np.array([[-2.69305227, 0.55617734, -2.43874732, 2.07026181], [ 1.46140931, 2.56987791, 1.40453353, 1.36132903], [-0.7035427 , 0.92223751, 0.36613864, -1.59524025], [ 1.20800429, -0.4637739 , 1.1106433 , 0.85284503], [-2.23840524, -0.84209178, -0.04810615, 1.53559851], [ 3.44829863, 2.13786731, 0.9661306 , -3.02757453], [ 4.19588081, 0.95286342, 0.16051249, 0.59197391], [ 0.1426253 , -1.25490369, 0.0986404 , 0.6355671 ], [ 0.27103178, 0.52906105, -0.64250317, -1.29833464], [-0.08165368, -0.87659034, -3.02019504, 1.0802352 ], [ 0.52249028, -1.72924316, 1.21902947, 1.38806363], [-0.23223567, -1.22051892, 2.88914811, -0.29774035], [-1.04524743, -1.27354275, -1.0832457 , 0.05064772], [ 0.51773799, -1.61353239, 0.13621013, -2.08071959]]) and y = np.array([2, 3, 0, 4, 3, 0, 3, 0, 1, 1, 4, 1, 2, 2]) to train a decision tree (with random_state = 146), what are the predictions for X_test = np.array([[ 0.59730683, -1.47388234, 0.19323579, 1.00210836], [ 1.40572379, 1.44596069, -0.72924456, -1.5125544 ], [-2.38342167, -0.36546006, 0.77207181, 2.69580243], [ 1.26963 , -1.74360923, 0.26511791, -2.56548327], [-3.7681268 , 1.71391718, -3.59390654, 3.78457425]])?"],"metadata":{"id":"gHeXxPK7azET"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"LAH0iN-oasQ6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1743050581432,"user_tz":240,"elapsed":157,"user":{"displayName":"Vaibhav Gurav","userId":"09440818105492169759"}},"outputId":"c6e761c1-aade-4112-cd48-b68822cbfb09"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([4, 0, 3, 2, 2])"]},"metadata":{},"execution_count":11}],"source":["import numpy as np\n","import pandas as pd\n","from sklearn.ensemble import RandomForestClassifier\n","\n","X = np.array([[-2.69305227, 0.55617734, -2.43874732, 2.07026181], [ 1.46140931, 2.56987791, 1.40453353, 1.36132903], [-0.7035427 , 0.92223751, 0.36613864, -1.59524025], [ 1.20800429, -0.4637739 , 1.1106433 , 0.85284503], [-2.23840524, -0.84209178, -0.04810615, 1.53559851], [ 3.44829863, 2.13786731, 0.9661306 , -3.02757453], [ 4.19588081, 0.95286342, 0.16051249, 0.59197391], [ 0.1426253 , -1.25490369, 0.0986404 , 0.6355671 ], [ 0.27103178, 0.52906105, -0.64250317, -1.29833464], [-0.08165368, -0.87659034, -3.02019504, 1.0802352 ], [ 0.52249028, -1.72924316, 1.21902947, 1.38806363], [-0.23223567, -1.22051892, 2.88914811, -0.29774035], [-1.04524743, -1.27354275, -1.0832457 , 0.05064772], [ 0.51773799, -1.61353239, 0.13621013, -2.08071959]])\n","\n","y = np.array([2, 3, 0, 4, 3, 0, 3, 0, 1, 1, 4, 1, 2, 2])\n","\n","X_test = np.array([[ 0.59730683, -1.47388234, 0.19323579, 1.00210836], [ 1.40572379, 1.44596069, -0.72924456, -1.5125544 ], [-2.38342167, -0.36546006, 0.77207181, 2.69580243], [ 1.26963 , -1.74360923, 0.26511791, -2.56548327], [-3.7681268 , 1.71391718, -3.59390654, 3.78457425]])\n","\n","rf = RandomForestClassifier(random_state=146)\n","\n","rf.fit(X, y)\n","\n","rf.predict(X_test)"]},{"cell_type":"code","source":["import warnings\n","warnings.filterwarnings(\"ignore\")\n","\n","import numpy as np\n","import pandas as pd\n","import io\n","import requests\n","\n","# dataset1 (Data1)\n","df_url = 'https://raw.githubusercontent.com/akmand/datasets/master/openintro/bdims.csv'\n","url_content = requests.get(df_url, verify=False).content\n","data1 = pd.read_csv(io.StringIO(url_content.decode('utf-8')))\n","\n","# dataset2 (Data2)\n","df_url = 'https://raw.githubusercontent.com/akmand/datasets/main/baseball.csv'\n","url_content = requests.get(df_url, verify=False).content\n","data2 = pd.read_csv(io.StringIO(url_content.decode('utf-8')))\n","\n","# dataset3 (Data3)\n","df_url = 'https://raw.githubusercontent.com/akmand/datasets/main/cdc.csv'\n","url_content = requests.get(df_url, verify=False).content\n","data3 = pd.read_csv(io.StringIO(url_content.decode('utf-8')))"],"metadata":{"id":"xbEcSLxd_ZI1","executionInfo":{"status":"ok","timestamp":1743085493991,"user_tz":240,"elapsed":4288,"user":{"displayName":"Vaibhav Gurav","userId":"09440818105492169759"}}},"execution_count":1,"outputs":[]},{"cell_type":"code","source":["import warnings\n","warnings.filterwarnings(\"ignore\")\n","\n","import numpy as np\n","import pandas as pd\n","import io\n","import requests\n","\n","# dataset1 (Data1)\n","df_url = 'https://raw.githubusercontent.com/akmand/datasets/master/openintro/bdims.csv'\n","url_content = requests.get(df_url, verify=False).content\n","data1 = pd.read_csv(io.StringIO(url_content.decode('utf-8')))\n","\n","# dataset2 (Data2)\n","df_url = 'https://raw.githubusercontent.com/akmand/datasets/main/baseball.csv'\n","url_content = requests.get(df_url, verify=False).content\n","data2 = pd.read_csv(io.StringIO(url_content.decode('utf-8')))\n","\n","# dataset3 (Data3)\n","df_url = 'https://raw.githubusercontent.com/akmand/datasets/main/cdc.csv'\n","url_content = requests.get(df_url, verify=False).content\n","data3 = pd.read_csv(io.StringIO(url_content.decode('utf-8')))\n","\n","\n","\n"],"metadata":{"id":"M8ZoGzA58FnR","executionInfo":{"status":"ok","timestamp":1743085521364,"user_tz":240,"elapsed":43,"user":{"displayName":"Vaibhav Gurav","userId":"09440818105492169759"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["from sklearn.ensemble import RandomForestClassifier\n","help(RandomForestClassifier)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zWOLGNgitWHP","executionInfo":{"status":"ok","timestamp":1743103655329,"user_tz":240,"elapsed":3621,"user":{"displayName":"Vaibhav Gurav","userId":"09440818105492169759"}},"outputId":"3665773a-cba3-4e52-93f2-3d5b687df7ff"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Help on class RandomForestClassifier in module sklearn.ensemble._forest:\n","\n","class RandomForestClassifier(ForestClassifier)\n"," | RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)\n"," | \n"," | A random forest classifier.\n"," | \n"," | A random forest is a meta estimator that fits a number of decision tree\n"," | classifiers on various sub-samples of the dataset and uses averaging to\n"," | improve the predictive accuracy and control over-fitting.\n"," | Trees in the forest use the best split strategy, i.e. equivalent to passing\n"," | `splitter=\"best\"` to the underlying :class:`~sklearn.tree.DecisionTreeClassifier`.\n"," | The sub-sample size is controlled with the `max_samples` parameter if\n"," | `bootstrap=True` (default), otherwise the whole dataset is used to build\n"," | each tree.\n"," | \n"," | For a comparison between tree-based ensemble models see the example\n"," | :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py`.\n"," | \n"," | Read more in the :ref:`User Guide `.\n"," | \n"," | Parameters\n"," | ----------\n"," | n_estimators : int, default=100\n"," | The number of trees in the forest.\n"," | \n"," | .. versionchanged:: 0.22\n"," | The default value of ``n_estimators`` changed from 10 to 100\n"," | in 0.22.\n"," | \n"," | criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n"," | The function to measure the quality of a split. Supported criteria are\n"," | \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n"," | Shannon information gain, see :ref:`tree_mathematical_formulation`.\n"," | Note: This parameter is tree-specific.\n"," | \n"," | max_depth : int, default=None\n"," | The maximum depth of the tree. If None, then nodes are expanded until\n"," | all leaves are pure or until all leaves contain less than\n"," | min_samples_split samples.\n"," | \n"," | min_samples_split : int or float, default=2\n"," | The minimum number of samples required to split an internal node:\n"," | \n"," | - If int, then consider `min_samples_split` as the minimum number.\n"," | - If float, then `min_samples_split` is a fraction and\n"," | `ceil(min_samples_split * n_samples)` are the minimum\n"," | number of samples for each split.\n"," | \n"," | .. versionchanged:: 0.18\n"," | Added float values for fractions.\n"," | \n"," | min_samples_leaf : int or float, default=1\n"," | The minimum number of samples required to be at a leaf node.\n"," | A split point at any depth will only be considered if it leaves at\n"," | least ``min_samples_leaf`` training samples in each of the left and\n"," | right branches. This may have the effect of smoothing the model,\n"," | especially in regression.\n"," | \n"," | - If int, then consider `min_samples_leaf` as the minimum number.\n"," | - If float, then `min_samples_leaf` is a fraction and\n"," | `ceil(min_samples_leaf * n_samples)` are the minimum\n"," | number of samples for each node.\n"," | \n"," | .. versionchanged:: 0.18\n"," | Added float values for fractions.\n"," | \n"," | min_weight_fraction_leaf : float, default=0.0\n"," | The minimum weighted fraction of the sum total of weights (of all\n"," | the input samples) required to be at a leaf node. Samples have\n"," | equal weight when sample_weight is not provided.\n"," | \n"," | max_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n"," | The number of features to consider when looking for the best split:\n"," | \n"," | - If int, then consider `max_features` features at each split.\n"," | - If float, then `max_features` is a fraction and\n"," | `max(1, int(max_features * n_features_in_))` features are considered at each\n"," | split.\n"," | - If \"sqrt\", then `max_features=sqrt(n_features)`.\n"," | - If \"log2\", then `max_features=log2(n_features)`.\n"," | - If None, then `max_features=n_features`.\n"," | \n"," | .. versionchanged:: 1.1\n"," | The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n"," | \n"," | Note: the search for a split does not stop until at least one\n"," | valid partition of the node samples is found, even if it requires to\n"," | effectively inspect more than ``max_features`` features.\n"," | \n"," | max_leaf_nodes : int, default=None\n"," | Grow trees with ``max_leaf_nodes`` in best-first fashion.\n"," | Best nodes are defined as relative reduction in impurity.\n"," | If None then unlimited number of leaf nodes.\n"," | \n"," | min_impurity_decrease : float, default=0.0\n"," | A node will be split if this split induces a decrease of the impurity\n"," | greater than or equal to this value.\n"," | \n"," | The weighted impurity decrease equation is the following::\n"," | \n"," | N_t / N * (impurity - N_t_R / N_t * right_impurity\n"," | - N_t_L / N_t * left_impurity)\n"," | \n"," | where ``N`` is the total number of samples, ``N_t`` is the number of\n"," | samples at the current node, ``N_t_L`` is the number of samples in the\n"," | left child, and ``N_t_R`` is the number of samples in the right child.\n"," | \n"," | ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n"," | if ``sample_weight`` is passed.\n"," | \n"," | .. versionadded:: 0.19\n"," | \n"," | bootstrap : bool, default=True\n"," | Whether bootstrap samples are used when building trees. If False, the\n"," | whole dataset is used to build each tree.\n"," | \n"," | oob_score : bool or callable, default=False\n"," | Whether to use out-of-bag samples to estimate the generalization score.\n"," | By default, :func:`~sklearn.metrics.accuracy_score` is used.\n"," | Provide a callable with signature `metric(y_true, y_pred)` to use a\n"," | custom metric. Only available if `bootstrap=True`.\n"," | \n"," | n_jobs : int, default=None\n"," | The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n"," | :meth:`decision_path` and :meth:`apply` are all parallelized over the\n"," | trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n"," | context. ``-1`` means using all processors. See :term:`Glossary\n"," | ` for more details.\n"," | \n"," | random_state : int, RandomState instance or None, default=None\n"," | Controls both the randomness of the bootstrapping of the samples used\n"," | when building trees (if ``bootstrap=True``) and the sampling of the\n"," | features to consider when looking for the best split at each node\n"," | (if ``max_features < n_features``).\n"," | See :term:`Glossary ` for details.\n"," | \n"," | verbose : int, default=0\n"," | Controls the verbosity when fitting and predicting.\n"," | \n"," | warm_start : bool, default=False\n"," | When set to ``True``, reuse the solution of the previous call to fit\n"," | and add more estimators to the ensemble, otherwise, just fit a whole\n"," | new forest. See :term:`Glossary ` and\n"," | :ref:`tree_ensemble_warm_start` for details.\n"," | \n"," | class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n"," | Weights associated with classes in the form ``{class_label: weight}``.\n"," | If not given, all classes are supposed to have weight one. For\n"," | multi-output problems, a list of dicts can be provided in the same\n"," | order as the columns of y.\n"," | \n"," | Note that for multioutput (including multilabel) weights should be\n"," | defined for each class of every column in its own dict. For example,\n"," | for four-class multilabel classification weights should be\n"," | [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n"," | [{1:1}, {2:5}, {3:1}, {4:1}].\n"," | \n"," | The \"balanced\" mode uses the values of y to automatically adjust\n"," | weights inversely proportional to class frequencies in the input data\n"," | as ``n_samples / (n_classes * np.bincount(y))``\n"," | \n"," | The \"balanced_subsample\" mode is the same as \"balanced\" except that\n"," | weights are computed based on the bootstrap sample for every tree\n"," | grown.\n"," | \n"," | For multi-output, the weights of each column of y will be multiplied.\n"," | \n"," | Note that these weights will be multiplied with sample_weight (passed\n"," | through the fit method) if sample_weight is specified.\n"," | \n"," | ccp_alpha : non-negative float, default=0.0\n"," | Complexity parameter used for Minimal Cost-Complexity Pruning. The\n"," | subtree with the largest cost complexity that is smaller than\n"," | ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n"," | :ref:`minimal_cost_complexity_pruning` for details. See\n"," | :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py`\n"," | for an example of such pruning.\n"," | \n"," | .. versionadded:: 0.22\n"," | \n"," | max_samples : int or float, default=None\n"," | If bootstrap is True, the number of samples to draw from X\n"," | to train each base estimator.\n"," | \n"," | - If None (default), then draw `X.shape[0]` samples.\n"," | - If int, then draw `max_samples` samples.\n"," | - If float, then draw `max(round(n_samples * max_samples), 1)` samples. Thus,\n"," | `max_samples` should be in the interval `(0.0, 1.0]`.\n"," | \n"," | .. versionadded:: 0.22\n"," | \n"," | monotonic_cst : array-like of int of shape (n_features), default=None\n"," | Indicates the monotonicity constraint to enforce on each feature.\n"," | - 1: monotonic increase\n"," | - 0: no constraint\n"," | - -1: monotonic decrease\n"," | \n"," | If monotonic_cst is None, no constraints are applied.\n"," | \n"," | Monotonicity constraints are not supported for:\n"," | - multiclass classifications (i.e. when `n_classes > 2`),\n"," | - multioutput classifications (i.e. when `n_outputs_ > 1`),\n"," | - classifications trained on data with missing values.\n"," | \n"," | The constraints hold over the probability of the positive class.\n"," | \n"," | Read more in the :ref:`User Guide `.\n"," | \n"," | .. versionadded:: 1.4\n"," | \n"," | Attributes\n"," | ----------\n"," | estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier`\n"," | The child estimator template used to create the collection of fitted\n"," | sub-estimators.\n"," | \n"," | .. versionadded:: 1.2\n"," | `base_estimator_` was renamed to `estimator_`.\n"," | \n"," | estimators_ : list of DecisionTreeClassifier\n"," | The collection of fitted sub-estimators.\n"," | \n"," | classes_ : ndarray of shape (n_classes,) or a list of such arrays\n"," | The classes labels (single output problem), or a list of arrays of\n"," | class labels (multi-output problem).\n"," | \n"," | n_classes_ : int or list\n"," | The number of classes (single output problem), or a list containing the\n"," | number of classes for each output (multi-output problem).\n"," | \n"," | n_features_in_ : int\n"," | Number of features seen during :term:`fit`.\n"," | \n"," | .. versionadded:: 0.24\n"," | \n"," | feature_names_in_ : ndarray of shape (`n_features_in_`,)\n"," | Names of features seen during :term:`fit`. Defined only when `X`\n"," | has feature names that are all strings.\n"," | \n"," | .. versionadded:: 1.0\n"," | \n"," | n_outputs_ : int\n"," | The number of outputs when ``fit`` is performed.\n"," | \n"," | feature_importances_ : ndarray of shape (n_features,)\n"," | The impurity-based feature importances.\n"," | The higher, the more important the feature.\n"," | The importance of a feature is computed as the (normalized)\n"," | total reduction of the criterion brought by that feature. It is also\n"," | known as the Gini importance.\n"," | \n"," | Warning: impurity-based feature importances can be misleading for\n"," | high cardinality features (many unique values). See\n"," | :func:`sklearn.inspection.permutation_importance` as an alternative.\n"," | \n"," | oob_score_ : float\n"," | Score of the training dataset obtained using an out-of-bag estimate.\n"," | This attribute exists only when ``oob_score`` is True.\n"," | \n"," | oob_decision_function_ : ndarray of shape (n_samples, n_classes) or (n_samples, n_classes, n_outputs)\n"," | Decision function computed with out-of-bag estimate on the training\n"," | set. If n_estimators is small it might be possible that a data point\n"," | was never left out during the bootstrap. In this case,\n"," | `oob_decision_function_` might contain NaN. This attribute exists\n"," | only when ``oob_score`` is True.\n"," | \n"," | estimators_samples_ : list of arrays\n"," | The subset of drawn samples (i.e., the in-bag samples) for each base\n"," | estimator. Each subset is defined by an array of the indices selected.\n"," | \n"," | .. versionadded:: 1.4\n"," | \n"," | See Also\n"," | --------\n"," | sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n"," | sklearn.ensemble.ExtraTreesClassifier : Ensemble of extremely randomized\n"," | tree classifiers.\n"," | sklearn.ensemble.HistGradientBoostingClassifier : A Histogram-based Gradient\n"," | Boosting Classification Tree, very fast for big datasets (n_samples >=\n"," | 10_000).\n"," | \n"," | Notes\n"," | -----\n"," | The default values for the parameters controlling the size of the trees\n"," | (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n"," | unpruned trees which can potentially be very large on some data sets. To\n"," | reduce memory consumption, the complexity and size of the trees should be\n"," | controlled by setting those parameter values.\n"," | \n"," | The features are always randomly permuted at each split. Therefore,\n"," | the best found split may vary, even with the same training data,\n"," | ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n"," | of the criterion is identical for several splits enumerated during the\n"," | search of the best split. To obtain a deterministic behaviour during\n"," | fitting, ``random_state`` has to be fixed.\n"," | \n"," | References\n"," | ----------\n"," | .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n"," | \n"," | Examples\n"," | --------\n"," | >>> from sklearn.ensemble import RandomForestClassifier\n"," | >>> from sklearn.datasets import make_classification\n"," | >>> X, y = make_classification(n_samples=1000, n_features=4,\n"," | ... n_informative=2, n_redundant=0,\n"," | ... random_state=0, shuffle=False)\n"," | >>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n"," | >>> clf.fit(X, y)\n"," | RandomForestClassifier(...)\n"," | >>> print(clf.predict([[0, 0, 0, 0]]))\n"," | [1]\n"," | \n"," | Method resolution order:\n"," | RandomForestClassifier\n"," | ForestClassifier\n"," | sklearn.base.ClassifierMixin\n"," | BaseForest\n"," | sklearn.base.MultiOutputMixin\n"," | sklearn.ensemble._base.BaseEnsemble\n"," | sklearn.base.MetaEstimatorMixin\n"," | sklearn.base.BaseEstimator\n"," | sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin\n"," | sklearn.utils._metadata_requests._MetadataRequester\n"," | builtins.object\n"," | \n"," | Methods defined here:\n"," | \n"," | __init__(self, n_estimators=100, *, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)\n"," | Initialize self. See help(type(self)) for accurate signature.\n"," | \n"," | set_fit_request(self: sklearn.ensemble._forest.RandomForestClassifier, *, sample_weight: Union[bool, NoneType, str] = '$UNCHANGED$') -> sklearn.ensemble._forest.RandomForestClassifier from sklearn.utils._metadata_requests.RequestMethod.__get__.\n"," | Request metadata passed to the ``fit`` method.\n"," | \n"," | Note that this method is only relevant if\n"," | ``enable_metadata_routing=True`` (see :func:`sklearn.set_config`).\n"," | Please see :ref:`User Guide ` on how the routing\n"," | mechanism works.\n"," | \n"," | The options for each parameter are:\n"," | \n"," | - ``True``: metadata is requested, and passed to ``fit`` if provided. The request is ignored if metadata is not provided.\n"," | \n"," | - ``False``: metadata is not requested and the meta-estimator will not pass it to ``fit``.\n"," | \n"," | - ``None``: metadata is not requested, and the meta-estimator will raise an error if the user provides it.\n"," | \n"," | - ``str``: metadata should be passed to the meta-estimator with this given alias instead of the original name.\n"," | \n"," | The default (``sklearn.utils.metadata_routing.UNCHANGED``) retains the\n"," | existing request. This allows you to change the request for some\n"," | parameters and not others.\n"," | \n"," | .. versionadded:: 1.3\n"," | \n"," | .. note::\n"," | This method is only relevant if this estimator is used as a\n"," | sub-estimator of a meta-estimator, e.g. used inside a\n"," | :class:`~sklearn.pipeline.Pipeline`. Otherwise it has no effect.\n"," | \n"," | Parameters\n"," | ----------\n"," | sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED\n"," | Metadata routing for ``sample_weight`` parameter in ``fit``.\n"," | \n"," | Returns\n"," | -------\n"," | self : object\n"," | The updated object.\n"," | \n"," | set_score_request(self: sklearn.ensemble._forest.RandomForestClassifier, *, sample_weight: Union[bool, NoneType, str] = '$UNCHANGED$') -> sklearn.ensemble._forest.RandomForestClassifier from sklearn.utils._metadata_requests.RequestMethod.__get__.\n"," | Request metadata passed to the ``score`` method.\n"," | \n"," | Note that this method is only relevant if\n"," | ``enable_metadata_routing=True`` (see :func:`sklearn.set_config`).\n"," | Please see :ref:`User Guide ` on how the routing\n"," | mechanism works.\n"," | \n"," | The options for each parameter are:\n"," | \n"," | - ``True``: metadata is requested, and passed to ``score`` if provided. The request is ignored if metadata is not provided.\n"," | \n"," | - ``False``: metadata is not requested and the meta-estimator will not pass it to ``score``.\n"," | \n"," | - ``None``: metadata is not requested, and the meta-estimator will raise an error if the user provides it.\n"," | \n"," | - ``str``: metadata should be passed to the meta-estimator with this given alias instead of the original name.\n"," | \n"," | The default (``sklearn.utils.metadata_routing.UNCHANGED``) retains the\n"," | existing request. This allows you to change the request for some\n"," | parameters and not others.\n"," | \n"," | .. versionadded:: 1.3\n"," | \n"," | .. note::\n"," | This method is only relevant if this estimator is used as a\n"," | sub-estimator of a meta-estimator, e.g. used inside a\n"," | :class:`~sklearn.pipeline.Pipeline`. Otherwise it has no effect.\n"," | \n"," | Parameters\n"," | ----------\n"," | sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED\n"," | Metadata routing for ``sample_weight`` parameter in ``score``.\n"," | \n"," | Returns\n"," | -------\n"," | self : object\n"," | The updated object.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Data and other attributes defined here:\n"," | \n"," | __abstractmethods__ = frozenset()\n"," | \n"," | __annotations__ = {'_parameter_constraints': }\n"," | \n"," | ----------------------------------------------------------------------\n"," | Methods inherited from ForestClassifier:\n"," | \n"," | __sklearn_tags__(self)\n"," | \n"," | predict(self, X)\n"," | Predict class for X.\n"," | \n"," | The predicted class of an input sample is a vote by the trees in\n"," | the forest, weighted by their probability estimates. That is,\n"," | the predicted class is the one with highest mean probability\n"," | estimate across the trees.\n"," | \n"," | Parameters\n"," | ----------\n"," | X : {array-like, sparse matrix} of shape (n_samples, n_features)\n"," | The input samples. Internally, its dtype will be converted to\n"," | ``dtype=np.float32``. If a sparse matrix is provided, it will be\n"," | converted into a sparse ``csr_matrix``.\n"," | \n"," | Returns\n"," | -------\n"," | y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n"," | The predicted classes.\n"," | \n"," | predict_log_proba(self, X)\n"," | Predict class log-probabilities for X.\n"," | \n"," | The predicted class log-probabilities of an input sample is computed as\n"," | the log of the mean predicted class probabilities of the trees in the\n"," | forest.\n"," | \n"," | Parameters\n"," | ----------\n"," | X : {array-like, sparse matrix} of shape (n_samples, n_features)\n"," | The input samples. Internally, its dtype will be converted to\n"," | ``dtype=np.float32``. If a sparse matrix is provided, it will be\n"," | converted into a sparse ``csr_matrix``.\n"," | \n"," | Returns\n"," | -------\n"," | p : ndarray of shape (n_samples, n_classes), or a list of such arrays\n"," | The class probabilities of the input samples. The order of the\n"," | classes corresponds to that in the attribute :term:`classes_`.\n"," | \n"," | predict_proba(self, X)\n"," | Predict class probabilities for X.\n"," | \n"," | The predicted class probabilities of an input sample are computed as\n"," | the mean predicted class probabilities of the trees in the forest.\n"," | The class probability of a single tree is the fraction of samples of\n"," | the same class in a leaf.\n"," | \n"," | Parameters\n"," | ----------\n"," | X : {array-like, sparse matrix} of shape (n_samples, n_features)\n"," | The input samples. Internally, its dtype will be converted to\n"," | ``dtype=np.float32``. If a sparse matrix is provided, it will be\n"," | converted into a sparse ``csr_matrix``.\n"," | \n"," | Returns\n"," | -------\n"," | p : ndarray of shape (n_samples, n_classes), or a list of such arrays\n"," | The class probabilities of the input samples. The order of the\n"," | classes corresponds to that in the attribute :term:`classes_`.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Methods inherited from sklearn.base.ClassifierMixin:\n"," | \n"," | score(self, X, y, sample_weight=None)\n"," | Return the mean accuracy on the given test data and labels.\n"," | \n"," | In multi-label classification, this is the subset accuracy\n"," | which is a harsh metric since you require for each sample that\n"," | each label set be correctly predicted.\n"," | \n"," | Parameters\n"," | ----------\n"," | X : array-like of shape (n_samples, n_features)\n"," | Test samples.\n"," | \n"," | y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n"," | True labels for `X`.\n"," | \n"," | sample_weight : array-like of shape (n_samples,), default=None\n"," | Sample weights.\n"," | \n"," | Returns\n"," | -------\n"," | score : float\n"," | Mean accuracy of ``self.predict(X)`` w.r.t. `y`.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Data descriptors inherited from sklearn.base.ClassifierMixin:\n"," | \n"," | __dict__\n"," | dictionary for instance variables\n"," | \n"," | __weakref__\n"," | list of weak references to the object\n"," | \n"," | ----------------------------------------------------------------------\n"," | Methods inherited from BaseForest:\n"," | \n"," | apply(self, X)\n"," | Apply trees in the forest to X, return leaf indices.\n"," | \n"," | Parameters\n"," | ----------\n"," | X : {array-like, sparse matrix} of shape (n_samples, n_features)\n"," | The input samples. Internally, its dtype will be converted to\n"," | ``dtype=np.float32``. If a sparse matrix is provided, it will be\n"," | converted into a sparse ``csr_matrix``.\n"," | \n"," | Returns\n"," | -------\n"," | X_leaves : ndarray of shape (n_samples, n_estimators)\n"," | For each datapoint x in X and for each tree in the forest,\n"," | return the index of the leaf x ends up in.\n"," | \n"," | decision_path(self, X)\n"," | Return the decision path in the forest.\n"," | \n"," | .. versionadded:: 0.18\n"," | \n"," | Parameters\n"," | ----------\n"," | X : {array-like, sparse matrix} of shape (n_samples, n_features)\n"," | The input samples. Internally, its dtype will be converted to\n"," | ``dtype=np.float32``. If a sparse matrix is provided, it will be\n"," | converted into a sparse ``csr_matrix``.\n"," | \n"," | Returns\n"," | -------\n"," | indicator : sparse matrix of shape (n_samples, n_nodes)\n"," | Return a node indicator matrix where non zero elements indicates\n"," | that the samples goes through the nodes. The matrix is of CSR\n"," | format.\n"," | \n"," | n_nodes_ptr : ndarray of shape (n_estimators + 1,)\n"," | The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n"," | gives the indicator value for the i-th estimator.\n"," | \n"," | fit(self, X, y, sample_weight=None)\n"," | Build a forest of trees from the training set (X, y).\n"," | \n"," | Parameters\n"," | ----------\n"," | X : {array-like, sparse matrix} of shape (n_samples, n_features)\n"," | The training input samples. Internally, its dtype will be converted\n"," | to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n"," | converted into a sparse ``csc_matrix``.\n"," | \n"," | y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n"," | The target values (class labels in classification, real numbers in\n"," | regression).\n"," | \n"," | sample_weight : array-like of shape (n_samples,), default=None\n"," | Sample weights. If None, then samples are equally weighted. Splits\n"," | that would create child nodes with net zero or negative weight are\n"," | ignored while searching for a split in each node. In the case of\n"," | classification, splits are also ignored if they would result in any\n"," | single class carrying a negative weight in either child node.\n"," | \n"," | Returns\n"," | -------\n"," | self : object\n"," | Fitted estimator.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Readonly properties inherited from BaseForest:\n"," | \n"," | estimators_samples_\n"," | The subset of drawn samples for each base estimator.\n"," | \n"," | Returns a dynamically generated list of indices identifying\n"," | the samples used for fitting each member of the ensemble, i.e.,\n"," | the in-bag samples.\n"," | \n"," | Note: the list is re-created at each call to the property in order\n"," | to reduce the object memory footprint by not storing the sampling\n"," | data. Thus fetching the property may be slower than expected.\n"," | \n"," | feature_importances_\n"," | The impurity-based feature importances.\n"," | \n"," | The higher, the more important the feature.\n"," | The importance of a feature is computed as the (normalized)\n"," | total reduction of the criterion brought by that feature. It is also\n"," | known as the Gini importance.\n"," | \n"," | Warning: impurity-based feature importances can be misleading for\n"," | high cardinality features (many unique values). See\n"," | :func:`sklearn.inspection.permutation_importance` as an alternative.\n"," | \n"," | Returns\n"," | -------\n"," | feature_importances_ : ndarray of shape (n_features,)\n"," | The values of this array sum to 1, unless all trees are single node\n"," | trees consisting of only the root node, in which case it will be an\n"," | array of zeros.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Methods inherited from sklearn.ensemble._base.BaseEnsemble:\n"," | \n"," | __getitem__(self, index)\n"," | Return the index'th estimator in the ensemble.\n"," | \n"," | __iter__(self)\n"," | Return iterator over estimators in the ensemble.\n"," | \n"," | __len__(self)\n"," | Return the number of estimators in the ensemble.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Methods inherited from sklearn.base.BaseEstimator:\n"," | \n"," | __getstate__(self)\n"," | Helper for pickle.\n"," | \n"," | __repr__(self, N_CHAR_MAX=700)\n"," | Return repr(self).\n"," | \n"," | __setstate__(self, state)\n"," | \n"," | __sklearn_clone__(self)\n"," | \n"," | get_params(self, deep=True)\n"," | Get parameters for this estimator.\n"," | \n"," | Parameters\n"," | ----------\n"," | deep : bool, default=True\n"," | If True, will return the parameters for this estimator and\n"," | contained subobjects that are estimators.\n"," | \n"," | Returns\n"," | -------\n"," | params : dict\n"," | Parameter names mapped to their values.\n"," | \n"," | set_params(self, **params)\n"," | Set the parameters of this estimator.\n"," | \n"," | The method works on simple estimators as well as on nested objects\n"," | (such as :class:`~sklearn.pipeline.Pipeline`). The latter have\n"," | parameters of the form ``__`` so that it's\n"," | possible to update each component of a nested object.\n"," | \n"," | Parameters\n"," | ----------\n"," | **params : dict\n"," | Estimator parameters.\n"," | \n"," | Returns\n"," | -------\n"," | self : estimator instance\n"," | Estimator instance.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Methods inherited from sklearn.utils._metadata_requests._MetadataRequester:\n"," | \n"," | get_metadata_routing(self)\n"," | Get metadata routing of this object.\n"," | \n"," | Please check :ref:`User Guide ` on how the routing\n"," | mechanism works.\n"," | \n"," | Returns\n"," | -------\n"," | routing : MetadataRequest\n"," | A :class:`~sklearn.utils.metadata_routing.MetadataRequest` encapsulating\n"," | routing information.\n"," | \n"," | ----------------------------------------------------------------------\n"," | Class methods inherited from sklearn.utils._metadata_requests._MetadataRequester:\n"," | \n"," | __init_subclass__(**kwargs)\n"," | Set the ``set_{method}_request`` methods.\n"," | \n"," | This uses PEP-487 [1]_ to set the ``set_{method}_request`` methods. It\n"," | looks for the information available in the set default values which are\n"," | set using ``__metadata_request__*`` class attributes, or inferred\n"," | from method signatures.\n"," | \n"," | The ``__metadata_request__*`` class attributes are used when a method\n"," | does not explicitly accept a metadata through its arguments or if the\n"," | developer would like to specify a request value for those metadata\n"," | which are different from the default ``None``.\n"," | \n"," | References\n"," | ----------\n"," | .. [1] https://www.python.org/dev/peps/pep-0487\n","\n"]}]}]}