/*-
 * Copyright (c) 2023 NVIDIA corporation & affiliates.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include "opt_ipsec.h"

#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <net/pfkeyv2.h>
#include <netipsec/key_var.h>
#include <netipsec/keydb.h>
#include <netipsec/ipsec.h>
#include <netipsec/xform.h>
#include <netipsec/ipsec_offload.h>
#include <dev/mlx5/fs.h>
#include <dev/mlx5/mlx5_en/en.h>
#include <dev/mlx5/qp.h>
#include <dev/mlx5/mlx5_accel/ipsec.h>
#include <dev/mlx5/mlx5_core/fs_core.h>
#include <dev/mlx5/mlx5_core/fs_chains.h>

/*
 * TX tables are organized differently for Ethernet and for RoCE:
 *
 *                       +=========+
 *       Ethernet Tx     | SA KSPI | match
 * --------------------->|Flowtable|----->+         +
 *                       |         |\     |        / \
 *                       +=========+ |    |       /   \         +=========+     +=========+
 *                              miss |    |      /     \        |  Status |     |         |
 *                      DROP<--------+    |---->|Encrypt|------>|Flowtable|---->|  TX NS  |
 *                                        |      \     /        |         |     |         |
 *                                        |       \   /         +=========+     +=========+
 *       +=========+      +=========+     |        \ /               |
 *  RoCE |  Policy | match|SA ReqId |match|         +                |
 *  Tx   |Flowtable|----->|Flowtable|---->+                          |
 *  ---->|IP header|      |ReqId+IP |                                |
 *       |         |      | header  |--------------------------------+
 *       +=========+      +=========+         miss                   |
 *            |                                                      |
 *            |                   miss                               |
 *            +-------------------------------------------------------
 *
 *                                                                                  +=========+
 *                                                                                  |   RDMA  |
 *                                                                                  |Flowtable|
 *                                                                                  |         |
 * Rx Tables and rules:                                                             +=========+
 *                                             +                                        /
 *       +=========+      +=========+         / \         +=========+      +=========+ /match
 *       |  Policy |      |   SA    |        /   \        |  Status |      |  RoCE   |/
 *  ---->|Flowtable| match|Flowtable| match /     \       |Flowtable|----->|Flowtable|
 *       |IP header|----->|IP header|----->|Decrypt|----->|         |      | Roce V2 |
 *       |         |      |+ESP+SPI |       \     /       |         |      | UDP port|\
 *       +=========+      +=========+        \   /        +=========+      +=========+ \miss
 *             |               |              \ /                                       \
 *             |               |               +                                      +=========+
 *             |     miss      |          miss                                       | Ethernet|
 *             +--------------->---------------------------------------------------->|  RX NS  |
 *                                                                                   |         |
 *                                                                                   +=========+
 *
 */

#define NUM_IPSEC_FTE BIT(15)
#define IPSEC_TUNNEL_DEFAULT_TTL 0x40

struct mlx5e_ipsec_fc {
	struct mlx5_fc *cnt;
	struct mlx5_fc *drop;
};

struct mlx5e_ipsec_ft {
	struct mutex mutex; /* Protect changes to this struct */
	struct mlx5_flow_table *pol;
	struct mlx5_flow_table *sa_kspi;
	struct mlx5_flow_table *sa;
	struct mlx5_flow_table *status;
	u32 refcnt;
};

struct mlx5e_ipsec_tx_roce {
	struct mlx5_flow_group *g;
	struct mlx5_flow_table *ft;
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_namespace *ns;
};

struct mlx5e_ipsec_miss {
	struct mlx5_flow_group *group;
	struct mlx5_flow_handle *rule;
};

struct mlx5e_ipsec_tx {
	struct mlx5e_ipsec_ft ft;
	struct mlx5e_ipsec_miss pol;
	struct mlx5e_ipsec_miss kspi_miss;
	struct mlx5e_ipsec_rule status;
	struct mlx5e_ipsec_rule kspi_bypass_rule; /*rule for IPSEC bypass*/
	struct mlx5_flow_namespace *ns;
	struct mlx5e_ipsec_fc *fc;
	struct mlx5_fs_chains *chains;
	struct mlx5e_ipsec_tx_roce roce;
};

struct mlx5e_ipsec_rx_roce {
	struct mlx5_flow_group *g;
	struct mlx5_flow_table *ft;
	struct mlx5_flow_handle *rule;
	struct mlx5e_ipsec_miss roce_miss;

	struct mlx5_flow_table *ft_rdma;
	struct mlx5_flow_namespace *ns_rdma;
};

struct mlx5e_ipsec_rx_ip_type {
	struct mlx5_flow_table *ft;
	struct mlx5_flow_namespace *ns;
	struct mlx5_flow_handle *ipv4_rule;
	struct mlx5_flow_handle *ipv6_rule;
	struct mlx5e_ipsec_miss miss;
};

struct mlx5e_ipsec_rx {
	struct mlx5e_ipsec_ft ft;
	struct mlx5e_ipsec_miss pol;
	struct mlx5e_ipsec_miss sa;
	struct mlx5e_ipsec_rule status;
	struct mlx5_flow_namespace *ns;
	struct mlx5e_ipsec_fc *fc;
	struct mlx5_fs_chains *chains;
	struct mlx5e_ipsec_rx_roce roce;
};

static void setup_fte_reg_a_with_tag(struct mlx5_flow_spec *spec,
                                     u16 kspi);
static void setup_fte_reg_a_no_tag(struct mlx5_flow_spec *spec);

static void setup_fte_no_frags(struct mlx5_flow_spec *spec)
{
	/* Non fragmented */
	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;

	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0);
}

static void setup_fte_esp(struct mlx5_flow_spec *spec)
{
	/* ESP header */
	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;

	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP);
}

static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap)
{
	/* SPI number */
	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;

	if (encap) {
		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.inner_esp_spi);
		MLX5_SET(fte_match_param, spec->match_value, misc_parameters.inner_esp_spi, spi);
	} else {
		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi);
		MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi);
	}
}

static void
setup_fte_vid(struct mlx5_flow_spec *spec, u16 vid)
{
	/* virtual lan tag */
	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;

	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
	    outer_headers.cvlan_tag);
	MLX5_SET(fte_match_param, spec->match_value,
	    outer_headers.cvlan_tag, 1);
	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
	    outer_headers.first_vid);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
	    vid);
}

static void
clear_fte_vid(struct mlx5_flow_spec *spec)
{
	MLX5_SET(fte_match_param, spec->match_criteria,
	    outer_headers.cvlan_tag, 0);
	MLX5_SET(fte_match_param, spec->match_value,
	    outer_headers.cvlan_tag, 0);
	MLX5_SET(fte_match_param, spec->match_criteria,
	    outer_headers.first_vid, 0);
	MLX5_SET(fte_match_param, spec->match_value,
	    outer_headers.first_vid, 0);
}

static void
setup_fte_no_vid(struct mlx5_flow_spec *spec)
{
	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
	    outer_headers.cvlan_tag);
	MLX5_SET(fte_match_param, spec->match_value,
	    outer_headers.cvlan_tag, 0);
}

static struct mlx5_fs_chains *
ipsec_chains_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *miss_ft,
		    enum mlx5_flow_namespace_type ns, int base_prio,
		    int base_level, struct mlx5_flow_table **root_ft)
{
	struct mlx5_chains_attr attr = {};
	struct mlx5_fs_chains *chains;
	struct mlx5_flow_table *ft;
	int err;

	attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
		     MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
	attr.max_grp_num = 2;
	attr.default_ft = miss_ft;
	attr.ns = ns;
	attr.fs_base_prio = base_prio;
	attr.fs_base_level = base_level;
	chains = mlx5_chains_create(mdev, &attr);
	if (IS_ERR(chains))
		return chains;

	/* Create chain 0, prio 1, level 0 to connect chains to prev in fs_core */
	ft = mlx5_chains_get_table(chains, 0, 1, 0);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_chains_get;
	}

	*root_ft = ft;
	return chains;

err_chains_get:
	mlx5_chains_destroy(chains);
	return ERR_PTR(err);
}

static void ipsec_chains_destroy(struct mlx5_fs_chains *chains)
{
	mlx5_chains_put_table(chains, 0, 1, 0);
	mlx5_chains_destroy(chains);
}

static struct mlx5_flow_table *
ipsec_chains_get_table(struct mlx5_fs_chains *chains, u32 prio)
{
	return mlx5_chains_get_table(chains, 0, prio + 1, 0);
}

static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio)
{
	mlx5_chains_put_table(chains, 0, prio + 1, 0);
}

static struct mlx5_flow_table *ipsec_rx_ft_create(struct mlx5_flow_namespace *ns,
						  int level, int prio,
						  int max_num_groups)
{
	struct mlx5_flow_table_attr ft_attr = {};

	ft_attr.max_fte = NUM_IPSEC_FTE;
	ft_attr.level = level;
	ft_attr.prio = prio;
	ft_attr.autogroup.max_num_groups = max_num_groups;
	ft_attr.autogroup.num_reserved_entries = 1;

	return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
}

static int ipsec_miss_create(struct mlx5_core_dev *mdev,
			     struct mlx5_flow_table *ft,
			     struct mlx5e_ipsec_miss *miss,
			     struct mlx5_flow_destination *dest)
{
	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_spec *spec;
	u32 *flow_group_in;
	int err = 0;

	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!flow_group_in || !spec) {
		err = -ENOMEM;
		goto out;
	}

	/* Create miss_group */
	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
	miss->group = mlx5_create_flow_group(ft, flow_group_in);
	if (IS_ERR(miss->group)) {
		err = PTR_ERR(miss->group);
		mlx5_core_err(mdev, "fail to create IPsec miss_group err=%d\n",
			      err);
		goto out;
	}

	if (dest)
		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
	else
		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
	/* Create miss rule */
	miss->rule = mlx5_add_flow_rules(ft, NULL, &flow_act, dest, 1);
	if (IS_ERR(miss->rule)) {
		mlx5_destroy_flow_group(miss->group);
		err = PTR_ERR(miss->rule);
		mlx5_core_err(mdev, "fail to create IPsec miss_rule err=%d\n",
			      err);
		goto out;
	}
out:
	kvfree(flow_group_in);
	kvfree(spec);
	return err;
}

static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
                               struct mlx5_flow_act *flow_act)
{
        u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
        enum mlx5_flow_namespace_type ns_type;
        struct mlx5_modify_hdr *modify_hdr;

        MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
        switch (dir) {
        case IPSEC_DIR_INBOUND:
                MLX5_SET(set_action_in, action, field,
                         MLX5_ACTION_IN_FIELD_METADATA_REG_B);
                ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
                break;
        case IPSEC_DIR_OUTBOUND:
                MLX5_SET(set_action_in, action, field,
                         MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
                ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
                break;
        default:
                return -EINVAL;
        }

        MLX5_SET(set_action_in, action, data, val);
        MLX5_SET(set_action_in, action, offset, 0);
        MLX5_SET(set_action_in, action, length, 32);

        modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, 1, action);
        if (IS_ERR(modify_hdr)) {
                mlx5_core_err(mdev, "Failed to allocate modify_header %ld\n",
                              PTR_ERR(modify_hdr));
                return PTR_ERR(modify_hdr);
        }

        flow_act->modify_hdr = modify_hdr;
        flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
        return 0;
}

static int
setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs,
			     struct mlx5_pkt_reformat_params *reformat_params)
{
	struct udphdr *udphdr;
	size_t bfflen = 16;
	char *reformatbf;
	__be32 spi;
	void *hdr;

	if (attrs->family == AF_INET) {
		if (attrs->encap)
			reformat_params->type = MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4;
		else
			reformat_params->type = MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
	} else {
		if (attrs->encap)
			reformat_params->type =
			    MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6;
		else
			reformat_params->type =
			    MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
	}

	if (attrs->encap)
		bfflen += sizeof(*udphdr);
	reformatbf = kzalloc(bfflen, GFP_KERNEL);
	if (!reformatbf)
		return -ENOMEM;

	hdr = reformatbf;
	if (attrs->encap) {
		udphdr = (struct udphdr *)reformatbf;
		udphdr->uh_sport = attrs->sport;
		udphdr->uh_dport = attrs->dport;
		hdr += sizeof(*udphdr);
	}

	/* convert to network format */
	spi = htonl(attrs->spi);
	memcpy(hdr, &spi, 4);

	reformat_params->param_0 = attrs->authsize;
	reformat_params->size = bfflen;
	reformat_params->data = reformatbf;

	return 0;
}

static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
			      struct mlx5_accel_esp_xfrm_attrs *attrs,
			      struct mlx5_flow_act *flow_act)
{
	enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
	struct mlx5_pkt_reformat_params reformat_params = {};
	struct mlx5_pkt_reformat *pkt_reformat;
	int ret;

	if (attrs->dir == IPSEC_DIR_INBOUND) {
		if (attrs->encap)
			reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP;
		else
			reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
		goto cmd;
	}

	ret = setup_pkt_transport_reformat(attrs, &reformat_params);
	if (ret)
		return ret;
cmd:
	pkt_reformat =
		mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type);
	if (reformat_params.data)
		kfree(reformat_params.data);
	if (IS_ERR(pkt_reformat))
		return PTR_ERR(pkt_reformat);

	flow_act->pkt_reformat = pkt_reformat;
	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
	return 0;
}

static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr,
                            __be32 *daddr)
{
        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;

        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
        MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4);

        memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4);
        memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4);
        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
                         outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
                         outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
}

static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr,
                            __be32 *daddr)
{
        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;

        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
        MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6);

        memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16);
        memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16);
        memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
                            outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16);
        memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
                            outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16);
}

static void
setup_fte_ip_version(struct mlx5_flow_spec *spec, u8 family)
{
        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;

        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
        MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
                 family == AF_INET ? 4 : 6);
}

static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
{
	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
	struct mlx5_flow_destination dest[2] = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	struct mlx5e_ipsec_rx *rx;
	struct mlx5_fc *counter;
	int err;

	rx = (attrs->family == AF_INET) ? ipsec->rx_ipv4 : ipsec->rx_ipv6;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	if (!attrs->drop) {
		err = setup_modify_header(mdev, sa_entry->kspi | BIT(31), IPSEC_DIR_INBOUND,
					  &flow_act);
		if (err)
			goto err_mod_header;
	}

	err = setup_pkt_reformat(mdev, attrs, &flow_act);
	if (err)
		goto err_pkt_reformat;

	counter = mlx5_fc_create(mdev, false);
	if (IS_ERR(counter)) {
		err = PTR_ERR(counter);
		goto err_add_cnt;
	}

	flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
	flow_act.crypto.op = MLX5_FLOW_ACT_CRYPTO_OP_DECRYPT;
	flow_act.crypto.obj_id = sa_entry->ipsec_obj_id;
	flow_act.flags |= FLOW_ACT_NO_APPEND;

	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
		MLX5_FLOW_CONTEXT_ACTION_COUNT;

	if (attrs->drop)
		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
	else
		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;

	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
	dest[0].ft = rx->ft.status;
	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
	dest[1].counter_id = mlx5_fc_id(counter);

	if (attrs->family == AF_INET)
		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
	else
		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);

	if (!attrs->encap)
		setup_fte_esp(spec);

	setup_fte_spi(spec, attrs->spi, attrs->encap);
	setup_fte_no_frags(spec);

	if (sa_entry->vid != VLAN_NONE)
		setup_fte_vid(spec, sa_entry->vid);
	else
		setup_fte_no_vid(spec);

	rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err);
		goto err_add_flow;
	}
	ipsec_rule->rule = rule;

	/* Add another rule for zero vid */
	if (sa_entry->vid == VLAN_NONE) {
		clear_fte_vid(spec);
		setup_fte_vid(spec, 0);
		rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2);
		if (IS_ERR(rule)) {
			err = PTR_ERR(rule);
			mlx5_core_err(mdev,
			    "fail to add RX ipsec zero vid rule err=%d\n",
			    err);
			goto err_add_flow;
		}
		ipsec_rule->vid_zero_rule = rule;
	}

	kvfree(spec);
	ipsec_rule->fc = counter;
	ipsec_rule->modify_hdr = flow_act.modify_hdr;
	ipsec_rule->pkt_reformat = flow_act.pkt_reformat;
	return 0;

err_add_flow:
	mlx5_fc_destroy(mdev, counter);
	if (ipsec_rule->rule != NULL)
		mlx5_del_flow_rules(&ipsec_rule->rule);
err_add_cnt:
	mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat);
err_pkt_reformat:
	if (flow_act.modify_hdr != NULL)
		mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
err_mod_header:
	kvfree(spec);

	return err;
}

static struct mlx5_flow_table *ipsec_tx_ft_create(struct mlx5_flow_namespace *ns,
						  int level, int prio,
						  int max_num_groups)
{
	struct mlx5_flow_table_attr ft_attr = {};

        ft_attr.autogroup.num_reserved_entries = 1;
        ft_attr.autogroup.max_num_groups = max_num_groups;
        ft_attr.max_fte = NUM_IPSEC_FTE;
        ft_attr.level = level;
        ft_attr.prio = prio;

	return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
}

static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
{
	struct mlx5_flow_destination dest = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_handle *fte;
	int err;

	/* create fte */
	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_COUNT |
		MLX5_FLOW_CONTEXT_ACTION_ALLOW;

	dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
	dest.counter_id = mlx5_fc_id(tx->fc->cnt);
	fte = mlx5_add_flow_rules(tx->ft.status, NULL, &flow_act, &dest, 1);
	if (IS_ERR_OR_NULL(fte)) {
		err = PTR_ERR(fte);
		mlx5_core_err(mdev, "Fail to add ipsec tx counter rule err=%d\n", err);
		goto err_rule;
	}

	tx->status.rule = fte;
	return 0;

err_rule:
	return err;
}

static void tx_destroy_roce(struct mlx5e_ipsec_tx *tx) {
	if (!tx->roce.ft)
		return;

	mlx5_del_flow_rules(&tx->roce.rule);
	mlx5_destroy_flow_group(tx->roce.g);
	mlx5_destroy_flow_table(tx->roce.ft);
	tx->roce.ft = NULL;
}

/* IPsec TX flow steering */
static void tx_destroy(struct mlx5e_ipsec_tx *tx)
{
	tx_destroy_roce(tx);
	if (tx->chains) {
		ipsec_chains_destroy(tx->chains);
	} else {
		mlx5_del_flow_rules(&tx->pol.rule);
		mlx5_destroy_flow_group(tx->pol.group);
		mlx5_destroy_flow_table(tx->ft.pol);
	}
	mlx5_destroy_flow_table(tx->ft.sa);
	mlx5_del_flow_rules(&tx->kspi_miss.rule);
	mlx5_destroy_flow_group(tx->kspi_miss.group);
	mlx5_del_flow_rules(&tx->kspi_bypass_rule.rule);
	mlx5_del_flow_rules(&tx->kspi_bypass_rule.kspi_rule);
	mlx5_destroy_flow_table(tx->ft.sa_kspi);
	mlx5_del_flow_rules(&tx->status.rule);
	mlx5_destroy_flow_table(tx->ft.status);
}

static int ipsec_tx_roce_rule_setup(struct mlx5_core_dev *mdev,
				    struct mlx5e_ipsec_tx *tx)
{
	struct mlx5_flow_destination dst = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_handle *rule;
	int err = 0;

	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
	dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
	dst.ft = tx->ft.pol;
	rule = mlx5_add_flow_rules(tx->roce.ft, NULL, &flow_act, &dst, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Fail to add TX roce ipsec rule err=%d\n",
			      err);
		goto out;
	}
	tx->roce.rule = rule;

out:
	return err;
}

static int ipsec_tx_create_roce(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
{
	struct mlx5_flow_table_attr ft_attr = {};
	struct mlx5_flow_table *ft;
	struct mlx5_flow_group *g;
	int ix = 0;
	int err;
	u32 *in;

	if (!tx->roce.ns)
		return -EOPNOTSUPP;

	in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL);
	if (!in)
		return -ENOMEM;

	ft_attr.max_fte = 1;
	ft = mlx5_create_flow_table(tx->roce.ns, &ft_attr);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		mlx5_core_err(mdev, "Fail to create ipsec tx roce ft err=%d\n",
			      err);
		goto fail_table;
	}
	tx->roce.ft = ft;

	MLX5_SET_CFG(in, start_flow_index, ix);
	ix += 1;
	MLX5_SET_CFG(in, end_flow_index, ix - 1);
	g = mlx5_create_flow_group(ft, in);
	if (IS_ERR(g)) {
		err = PTR_ERR(g);
		mlx5_core_err(mdev, "Fail to create ipsec tx roce group err=%d\n",
			      err);
		goto fail_group;
	}
	tx->roce.g = g;

	err = ipsec_tx_roce_rule_setup(mdev, tx);
	if (err) {
		mlx5_core_err(mdev, "Fail to create RoCE IPsec tx rules err=%d\n", err);
		goto fail_rule;
	}

	kvfree(in);
	return 0;

fail_rule:
	mlx5_destroy_flow_group(tx->roce.g);
fail_group:
	mlx5_destroy_flow_table(tx->roce.ft);
	tx->roce.ft = NULL;
fail_table:
	kvfree(in);
	return err;
}

/*
 * Setting a rule in KSPI table for values that should bypass IPSEC.
 *
 * mdev - mlx5 core device
 * tx - IPSEC TX
 * return - 0 for success errno for failure
 */
static int tx_create_kspi_bypass_rules(struct mlx5_core_dev *mdev,
                                       struct mlx5e_ipsec_tx *tx)
{
	struct mlx5_flow_destination dest = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_act flow_act_kspi = {};
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	int err;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	dest.ft = tx->ft.status;
	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
	flow_act_kspi.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;

	setup_fte_reg_a_with_tag(spec, IPSEC_ACCEL_DRV_SPI_BYPASS);
	rule = mlx5_add_flow_rules(tx->ft.sa_kspi, spec, &flow_act_kspi,
								&dest, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Fail to add ipsec kspi bypass rule err=%d\n",
                      err);
		goto err_add_kspi_rule;
	}
	tx->kspi_bypass_rule.kspi_rule = rule;

	/* set the rule for packets withoiut ipsec tag. */
	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
	memset(spec, 0, sizeof(*spec));
	setup_fte_reg_a_no_tag(spec);
	rule = mlx5_add_flow_rules(tx->ft.sa_kspi, spec, &flow_act, &dest, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Fail to add ipsec kspi bypass rule err=%d\n", err);
		goto err_add_rule;
	}
	tx->kspi_bypass_rule.rule = rule;

	kvfree(spec);
	return 0;
err_add_rule:
	mlx5_del_flow_rules(&tx->kspi_bypass_rule.kspi_rule);
err_add_kspi_rule:
	kvfree(spec);
	return err;
}


static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
{
	struct mlx5_flow_destination dest = {};
	struct mlx5_flow_table *ft;
	int err;

	/*
	 *  Tx flow is different for ethernet traffic then for RoCE packets
	 *  For Ethernet packets we start in SA KSPI table that matches KSPI of SA rule
	 *  to the KSPI in the packet metadata
	 *  For RoCE traffic we start in Policy table, then move to SA table
	 *  which matches either reqid of the SA rule to reqid reported by policy table
	 *  or ip header fields of SA to the packet IP header fields.
	 *  Tables are ordered by their level so we set kspi
	 *  with level 0 to have it first one for ethernet traffic.
	 *  For RoCE the RoCE TX table direct the packets to policy table explicitly
	 */
	ft = ipsec_tx_ft_create(tx->ns, 0, 0, 4);
	if (IS_ERR(ft))
		return PTR_ERR(ft);
	tx->ft.sa_kspi = ft;

	ft = ipsec_tx_ft_create(tx->ns, 2, 0, 4);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_reqid_ft;
	}
	tx->ft.sa = ft;

	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) {
		tx->chains = ipsec_chains_create(
				mdev, tx->ft.sa, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC, 0, 1,
				&tx->ft.pol);
		if (IS_ERR(tx->chains)) {
			err = PTR_ERR(tx->chains);
			goto err_pol_ft;
		}
	} else {
		ft = ipsec_tx_ft_create(tx->ns, 1, 0, 2);
		if (IS_ERR(ft)) {
			err = PTR_ERR(ft);
			goto err_pol_ft;
		}
		tx->ft.pol = ft;
		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
		dest.ft = tx->ft.sa;
		err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest);
		if (err)
			goto err_pol_miss;
	}

	ft = ipsec_tx_ft_create(tx->ns, 2, 0, 1);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_status_ft;
	}
	tx->ft.status = ft;

	/* set miss rule for kspi table with drop action*/
	err = ipsec_miss_create(mdev, tx->ft.sa_kspi, &tx->kspi_miss, NULL);
	if (err)
		goto err_kspi_miss;

	err = tx_create_kspi_bypass_rules(mdev, tx);
	if (err)
		goto err_kspi_rule;

	err = ipsec_counter_rule_tx(mdev, tx);
	if (err)
		goto err_status_rule;

	err = ipsec_tx_create_roce(mdev, tx);
	if (err)
		goto err_counter_rule;

	return 0;

err_counter_rule:
	mlx5_del_flow_rules(&tx->status.rule);
err_status_rule:
	mlx5_del_flow_rules(&tx->kspi_bypass_rule.rule);
	mlx5_del_flow_rules(&tx->kspi_bypass_rule.kspi_rule);
err_kspi_rule:
	mlx5_destroy_flow_table(tx->ft.status);
err_status_ft:
	if (tx->chains) {
		ipsec_chains_destroy(tx->chains);
	} else {
		mlx5_del_flow_rules(&tx->pol.rule);
		mlx5_destroy_flow_group(tx->pol.group);
	}
err_pol_miss:
	if (!tx->chains)
		mlx5_destroy_flow_table(tx->ft.pol);
err_pol_ft:
	mlx5_del_flow_rules(&tx->kspi_miss.rule);
	mlx5_destroy_flow_group(tx->kspi_miss.group);
err_kspi_miss:
	mlx5_destroy_flow_table(tx->ft.sa);
err_reqid_ft:
	mlx5_destroy_flow_table(tx->ft.sa_kspi);
	return err;
}

static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
		  struct mlx5e_ipsec_tx *tx)
{
	int err;

	if (tx->ft.refcnt)
		goto skip;

	err = tx_create(mdev, tx);
	if (err)
		return err;

skip:
	tx->ft.refcnt++;
	return 0;
}

static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
{
	if (--tx->ft.refcnt)
		return;

	tx_destroy(tx);
}

static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev,
					struct mlx5e_ipsec *ipsec)
{
	struct mlx5e_ipsec_tx *tx = ipsec->tx;
	int err;

	mutex_lock(&tx->ft.mutex);
	err = tx_get(mdev, ipsec, tx);
	mutex_unlock(&tx->ft.mutex);
	if (err)
		return ERR_PTR(err);

	return tx;
}

static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
                                                struct mlx5e_ipsec *ipsec,
                                                u32 prio)
{
        struct mlx5e_ipsec_tx *tx = ipsec->tx;
        struct mlx5_flow_table *ft;
        int err;

        mutex_lock(&tx->ft.mutex);
        err = tx_get(mdev, ipsec, tx);
        if (err)
            goto err_get;

        ft = tx->chains ? ipsec_chains_get_table(tx->chains, prio) : tx->ft.pol;
        if (IS_ERR(ft)) {
                err = PTR_ERR(ft);
                goto err_get_ft;
        }

        mutex_unlock(&tx->ft.mutex);
        return ft;

err_get_ft:
        tx_put(ipsec, tx);
err_get:
        mutex_unlock(&tx->ft.mutex);
        return ERR_PTR(err);
}

static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio)
{
        struct mlx5e_ipsec_tx *tx = ipsec->tx;

        mutex_lock(&tx->ft.mutex);
        if (tx->chains)
                ipsec_chains_put_table(tx->chains, prio);

        tx_put(ipsec, tx);
        mutex_unlock(&tx->ft.mutex);
}

static void tx_ft_put(struct mlx5e_ipsec *ipsec)
{
	struct mlx5e_ipsec_tx *tx = ipsec->tx;

	mutex_lock(&tx->ft.mutex);
	tx_put(ipsec, tx);
	mutex_unlock(&tx->ft.mutex);
}

static void setup_fte_reg_a_with_tag(struct mlx5_flow_spec *spec,
									 u16 kspi)
{
       /* Add IPsec indicator in metadata_reg_a. */
       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;

       MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
                        misc_parameters_2.metadata_reg_a);
       MLX5_SET(fte_match_param, spec->match_value,
                misc_parameters_2.metadata_reg_a,
                MLX5_ETH_WQE_FT_META_IPSEC << 23 |  kspi);
}

static void setup_fte_reg_a_no_tag(struct mlx5_flow_spec *spec)
{
       /* Add IPsec indicator in metadata_reg_a. */
       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;

       MLX5_SET(fte_match_param, spec->match_criteria,
                misc_parameters_2.metadata_reg_a,
				MLX5_ETH_WQE_FT_META_IPSEC << 23);
       MLX5_SET(fte_match_param, spec->match_value,
                misc_parameters_2.metadata_reg_a,
                0);
}

static void setup_fte_reg_c0(struct mlx5_flow_spec *spec, u32 reqid)
{
	/* Pass policy check before choosing this SA */
	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;

	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
			 misc_parameters_2.metadata_reg_c_0);
	MLX5_SET(fte_match_param, spec->match_value,
		 misc_parameters_2.metadata_reg_c_0, reqid);
}

static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upspec *upspec)
{
        switch (upspec->proto) {
        case IPPROTO_UDP:
                if (upspec->dport) {
                        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4,
                                         spec->match_criteria, udp_dport);
                        MLX5_SET(fte_match_set_lyr_2_4, spec->match_value,
                                 udp_dport, upspec->dport);
                }

                if (upspec->sport) {
                        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4,
                                         spec->match_criteria, udp_sport);
                        MLX5_SET(fte_match_set_lyr_2_4, spec->match_value,
                                 udp_dport, upspec->sport);
                }
                break;
        case IPPROTO_TCP:
                if (upspec->dport) {
                        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4,
                                         spec->match_criteria, tcp_dport);
                        MLX5_SET(fte_match_set_lyr_2_4, spec->match_value,
                                 tcp_dport, upspec->dport);
                }

                if (upspec->sport) {
                        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4,
                                         spec->match_criteria, tcp_sport);
                        MLX5_SET(fte_match_set_lyr_2_4, spec->match_value,
                                 tcp_dport, upspec->sport);
                }
                break;
        default:
                return;
        }

        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, spec->match_criteria, ip_protocol);
	MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, ip_protocol, upspec->proto);
}

static int tx_add_kspi_rule(struct mlx5e_ipsec_sa_entry *sa_entry,
							struct mlx5e_ipsec_tx *tx,
							struct mlx5_flow_act *flow_act,
							struct mlx5_flow_destination *dest,
							int num_dest)
{
	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	int err;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	setup_fte_no_frags(spec);
	setup_fte_reg_a_with_tag(spec, sa_entry->kspi);

	if (sa_entry->vid != VLAN_NONE)
		setup_fte_vid(spec, sa_entry->vid);
	else
		setup_fte_no_vid(spec);

	rule = mlx5_add_flow_rules(tx->ft.sa_kspi, spec, flow_act, dest, num_dest);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "fail to add TX ipsec kspi rule err=%d\n", err);
		goto err_add_kspi_flow;
	}
	ipsec_rule->kspi_rule = rule;
	kvfree(spec);
	return 0;

err_add_kspi_flow:
	kvfree(spec);
	return err;
}

static int tx_add_reqid_ip_rules(struct mlx5e_ipsec_sa_entry *sa_entry,
								struct mlx5e_ipsec_tx *tx,
								struct mlx5_flow_act *flow_act,
								struct mlx5_flow_destination *dest,
								int num_dest)
{
	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	int err;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;

	if(attrs->reqid) {
		if (sa_entry->vid != VLAN_NONE)
			setup_fte_vid(spec, sa_entry->vid);
		else
			setup_fte_no_vid(spec);
		setup_fte_no_frags(spec);
		setup_fte_reg_c0(spec, attrs->reqid);
		rule = mlx5_add_flow_rules(tx->ft.sa, spec, flow_act, dest, num_dest);
		if (IS_ERR(rule)) {
			err = PTR_ERR(rule);
			mlx5_core_err(mdev, "fail to add TX ipsec reqid rule err=%d\n", err);
			goto err_add_reqid_rule;
		}
		ipsec_rule->reqid_rule = rule;
		memset(spec, 0, sizeof(*spec));
	}

	if (sa_entry->vid != VLAN_NONE)
		setup_fte_vid(spec, sa_entry->vid);
	else
		setup_fte_no_vid(spec);

	if (attrs->family == AF_INET)
		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
	else
		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
	setup_fte_no_frags(spec);

	rule = mlx5_add_flow_rules(tx->ft.sa, spec, flow_act, dest, num_dest);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "fail to add TX ipsec ip rule err=%d\n", err);
		goto err_add_ip_rule;
	}
	ipsec_rule->rule = rule;
	kvfree(spec);
	return 0;

err_add_ip_rule:
	mlx5_del_flow_rules(&ipsec_rule->reqid_rule);
err_add_reqid_rule:
	kvfree(spec);
	return err;
}

static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
{
	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
	struct mlx5_flow_destination dest[2] = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5e_ipsec_tx *tx;
	struct mlx5_fc *counter;
	int err;

	tx = tx_ft_get(mdev, ipsec);
	if (IS_ERR(tx))
		return PTR_ERR(tx);

	err = setup_pkt_reformat(mdev, attrs, &flow_act);
	if (err)
		goto err_pkt_reformat;

	counter = mlx5_fc_create(mdev, false);
	if (IS_ERR(counter)) {
		err = PTR_ERR(counter);
		goto err_add_cnt;
	}

	flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
        flow_act.crypto.obj_id = sa_entry->ipsec_obj_id;
        flow_act.flags |= FLOW_ACT_NO_APPEND;
        flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
                           MLX5_FLOW_CONTEXT_ACTION_COUNT;

	if (attrs->drop)
		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
	else
		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;

	dest[0].ft = tx->ft.status;
	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
	dest[1].counter_id = mlx5_fc_id(counter);

	err = tx_add_kspi_rule(sa_entry, tx, &flow_act, dest, 2);
	if (err) {
		goto err_add_kspi_rule;
	}

	err = tx_add_reqid_ip_rules(sa_entry, tx, &flow_act, dest, 2);
	if (err) {
		goto err_add_reqid_ip_rule;
	}

	ipsec_rule->fc = counter;
	ipsec_rule->pkt_reformat = flow_act.pkt_reformat;
	return 0;

err_add_reqid_ip_rule:
	mlx5_del_flow_rules(&ipsec_rule->kspi_rule);
err_add_kspi_rule:
	mlx5_fc_destroy(mdev, counter);
err_add_cnt:
	if (flow_act.pkt_reformat)
		mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat);
err_pkt_reformat:
	tx_ft_put(ipsec);
	return err;
}

static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
{
        struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
        struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
        struct mlx5e_ipsec_tx *tx = pol_entry->ipsec->tx;
        struct mlx5_flow_destination dest[2] = {};
        struct mlx5_flow_act flow_act = {};
        struct mlx5_flow_handle *rule;
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_table *ft;
        int err, dstn = 0;

        ft = tx_ft_get_policy(mdev, pol_entry->ipsec, attrs->prio);
        if (IS_ERR(ft))
            return PTR_ERR(ft);

        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
            err = -ENOMEM;
            goto err_alloc;
        }

        if (attrs->family == AF_INET)
                setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
        else
                setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);

        setup_fte_no_frags(spec);
	setup_fte_upper_proto_match(spec, &attrs->upspec);

        switch (attrs->action) {
        case IPSEC_POLICY_IPSEC:
                flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                err = setup_modify_header(mdev, attrs->reqid,
                                          IPSEC_DIR_OUTBOUND, &flow_act);
                if (err)
                        goto err_mod_header;
                 break;
        case IPSEC_POLICY_DISCARD:
                flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
                dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
                dest[dstn].counter_id = mlx5_fc_id(tx->fc->drop);
                dstn++;
                break;
        default:
                err = -EINVAL;
                goto err_mod_header;
        }

        if (attrs->vid != VLAN_NONE)
                setup_fte_vid(spec, attrs->vid);
        else
                setup_fte_no_vid(spec);

        flow_act.flags |= FLOW_ACT_NO_APPEND;
        dest[dstn].ft = tx->ft.sa;
        dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
        dstn++;
        rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
                goto err_action;
        }

        kvfree(spec);
        pol_entry->ipsec_rule.rule = rule;
        pol_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr;
        return 0;

err_action:
        if (flow_act.modify_hdr)
                mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
err_mod_header:
        kvfree(spec);
err_alloc:
        tx_ft_put_policy(pol_entry->ipsec, attrs->prio);
        return err;
}

static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
{
        struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
        struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
	struct mlx5e_ipsec *ipsec = pol_entry->ipsec;
        struct mlx5_flow_destination dest[2];
        struct mlx5_flow_act flow_act = {};
        struct mlx5_flow_handle *rule;
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_table *ft;
        struct mlx5e_ipsec_rx *rx;
	int err, dstn = 0;

        rx = (attrs->family == AF_INET) ? ipsec->rx_ipv4 : ipsec->rx_ipv6;
        ft = rx->chains ? ipsec_chains_get_table(rx->chains, attrs->prio) : rx->ft.pol;
        if (IS_ERR(ft))
                return PTR_ERR(ft);

        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
                err = -ENOMEM;
                goto err_alloc;
        }

        switch (attrs->action) {
        case IPSEC_POLICY_IPSEC:
                flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                break;
        case IPSEC_POLICY_DISCARD:
                flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
                dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
                dest[dstn].counter_id = mlx5_fc_id(rx->fc->drop);
                dstn++;
                break;
        default:
                err = -EINVAL;
                goto err_action;
        }

        flow_act.flags |= FLOW_ACT_NO_APPEND;
        dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
        dest[dstn].ft = rx->ft.sa;
        dstn++;

	if (attrs->family == AF_INET)
		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
	else
		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);

	setup_fte_no_frags(spec);
	setup_fte_upper_proto_match(spec, &attrs->upspec);
	if (attrs->vid != VLAN_NONE)
		setup_fte_vid(spec, attrs->vid);
	else
		setup_fte_no_vid(spec);

	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev,
		    "Failed to add RX IPsec policy rule err=%d\n", err);
		goto err_action;
	}
	pol_entry->ipsec_rule.rule = rule;

	/* Add also rule for zero vid */
	if (attrs->vid == VLAN_NONE) {
		clear_fte_vid(spec);
		setup_fte_vid(spec, 0);
		rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn);
		if (IS_ERR(rule)) {
			err = PTR_ERR(rule);
			mlx5_core_err(mdev,
			    "Failed to add RX IPsec policy rule err=%d\n",
			    err);
			goto err_action;
		}
		pol_entry->ipsec_rule.vid_zero_rule = rule;
	}

	kvfree(spec);
        return 0;

err_action:
	if (pol_entry->ipsec_rule.rule != NULL)
		mlx5_del_flow_rules(&pol_entry->ipsec_rule.rule);
	kvfree(spec);
err_alloc:
        if (rx->chains != NULL)
                ipsec_chains_put_table(rx->chains, attrs->prio);
        return err;
}

static void ipsec_fs_destroy_counters(struct mlx5e_ipsec *ipsec)
{
	struct mlx5e_ipsec_rx *rx_ipv4 = ipsec->rx_ipv4;
	struct mlx5_core_dev *mdev = ipsec->mdev;
	struct mlx5e_ipsec_tx *tx = ipsec->tx;

	mlx5_fc_destroy(mdev, rx_ipv4->fc->drop);
	mlx5_fc_destroy(mdev, rx_ipv4->fc->cnt);
	kfree(rx_ipv4->fc);
	mlx5_fc_destroy(mdev, tx->fc->drop);
	mlx5_fc_destroy(mdev, tx->fc->cnt);
	kfree(tx->fc);
}

static int ipsec_fs_init_counters(struct mlx5e_ipsec *ipsec)
{
	struct mlx5e_ipsec_rx *rx_ipv4 = ipsec->rx_ipv4;
	struct mlx5e_ipsec_rx *rx_ipv6 = ipsec->rx_ipv6;
	struct mlx5_core_dev *mdev = ipsec->mdev;
	struct mlx5e_ipsec_tx *tx = ipsec->tx;
	struct mlx5e_ipsec_fc *fc;
	struct mlx5_fc *counter;
	int err;

	fc = kzalloc(sizeof(*tx->fc), GFP_KERNEL);
	if (!fc)
		return -ENOMEM;

	tx->fc = fc;
	counter = mlx5_fc_create(mdev, false);
	if (IS_ERR(counter)) {
		err = PTR_ERR(counter);
		goto err_tx_fc_alloc;
	}

	fc->cnt = counter;
	counter = mlx5_fc_create(mdev, false);
	if (IS_ERR(counter)) {
		err = PTR_ERR(counter);
		goto err_tx_fc_cnt;
	}

	fc->drop = counter;

	fc = kzalloc(sizeof(*tx->fc), GFP_KERNEL);
	if (!fc) {
		err = -ENOMEM;
		goto err_tx_fc_drop;
	}

	/* Both IPv4 and IPv6 point to same flow counters struct. */
	rx_ipv4->fc = fc;
	rx_ipv6->fc = fc;
	counter = mlx5_fc_create(mdev, false);
	if (IS_ERR(counter)) {
		err = PTR_ERR(counter);
		goto err_rx_fc_alloc;
	}

	fc->cnt = counter;
	counter = mlx5_fc_create(mdev, false);
	if (IS_ERR(counter)) {
		err = PTR_ERR(counter);
		goto err_rx_fc_cnt;
	}

	fc->drop = counter;
	return 0;

err_rx_fc_cnt:
	mlx5_fc_destroy(mdev, rx_ipv4->fc->cnt);
err_rx_fc_alloc:
	kfree(rx_ipv4->fc);
err_tx_fc_drop:
	mlx5_fc_destroy(mdev, tx->fc->drop);
err_tx_fc_cnt:
	mlx5_fc_destroy(mdev, tx->fc->cnt);
err_tx_fc_alloc:
	kfree(tx->fc);
	return err;
}

static int ipsec_status_rule(struct mlx5_core_dev *mdev,
			     struct mlx5e_ipsec_rx *rx,
			     struct mlx5_flow_destination *dest)
{
	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_modify_hdr *modify_hdr;
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	int err;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	/* Action to copy 7 bit ipsec_syndrome to regB[24:30] */
	MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
	MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME);
	MLX5_SET(copy_action_in, action, src_offset, 0);
	MLX5_SET(copy_action_in, action, length, 7);
	MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
	MLX5_SET(copy_action_in, action, dst_offset, 24);

	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
					      1, action);

	if (IS_ERR(modify_hdr)) {
		err = PTR_ERR(modify_hdr);
		mlx5_core_err(mdev,
			      "fail to alloc ipsec copy modify_header_id err=%d\n", err);
		goto out_spec;
	}

	/* create fte */
	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
		MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
		MLX5_FLOW_CONTEXT_ACTION_COUNT;
	flow_act.modify_hdr = modify_hdr;

	rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err);
		goto out;
	}

	kvfree(spec);
	rx->status.rule = rule;
	rx->status.modify_hdr = modify_hdr;
	return 0;

out:
	mlx5_modify_header_dealloc(mdev, modify_hdr);
out_spec:
	kvfree(spec);
	return err;
}

static void ipsec_fs_rx_roce_rules_destroy(struct mlx5e_ipsec_rx_roce *rx_roce)
{
	if (!rx_roce->ns_rdma)
		return;

	mlx5_del_flow_rules(&rx_roce->roce_miss.rule);
	mlx5_del_flow_rules(&rx_roce->rule);
	mlx5_destroy_flow_group(rx_roce->roce_miss.group);
	mlx5_destroy_flow_group(rx_roce->g);
}

static void ipsec_fs_rx_catchall_rules_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
{
	mutex_lock(&rx->ft.mutex);
	mlx5_del_flow_rules(&rx->sa.rule);
	mlx5_destroy_flow_group(rx->sa.group);
	if (rx->chains == NULL) {
		mlx5_del_flow_rules(&rx->pol.rule);
		mlx5_destroy_flow_group(rx->pol.group);
	}
	mlx5_del_flow_rules(&rx->status.rule);
	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
	ipsec_fs_rx_roce_rules_destroy(&rx->roce);
	mutex_unlock(&rx->ft.mutex);
}

static void ipsec_fs_rx_roce_table_destroy(struct mlx5e_ipsec_rx_roce *rx_roce)
{
	if (!rx_roce->ns_rdma)
		return;

	mlx5_destroy_flow_table(rx_roce->ft_rdma);
	mlx5_destroy_flow_table(rx_roce->ft);
}

static void
ipsec_fs_rx_ip_type_catchall_rule_destroy(struct mlx5e_ipsec_rx_ip_type* rx_ip_type)
{
	mlx5_del_flow_rules(&rx_ip_type->ipv4_rule);
	mlx5_del_flow_rules(&rx_ip_type->ipv6_rule);
	mlx5_del_flow_rules(&rx_ip_type->miss.rule);
	mlx5_destroy_flow_group(rx_ip_type->miss.group);
	rx_ip_type->miss.group = NULL;
}

static void ipsec_fs_rx_table_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
{
	if (rx->chains) {
		ipsec_chains_destroy(rx->chains);
	} else {
		mlx5_del_flow_rules(&rx->pol.rule);
		mlx5_destroy_flow_table(rx->ft.pol);
        }
	mlx5_destroy_flow_table(rx->ft.sa);
	mlx5_destroy_flow_table(rx->ft.status);
	ipsec_fs_rx_roce_table_destroy(&rx->roce);
}

static void ipsec_roce_setup_udp_dport(struct mlx5_flow_spec *spec, u16 dport)
{
	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, dport);
}

static int ipsec_roce_rx_rule_setup(struct mlx5_flow_destination *default_dst,
				    struct mlx5e_ipsec_rx_roce *roce, struct mlx5_core_dev *mdev)
{
	struct mlx5_flow_destination dst = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	int err = 0;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	ipsec_roce_setup_udp_dport(spec, ROCE_V2_UDP_DPORT);

	//flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;//not needed it is added in command
	dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
	dst.ft = roce->ft_rdma;

	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
	rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Fail to add RX roce ipsec rule err=%d\n",
			      err);
		goto fail_add_rule;
	}

	roce->rule = rule;

	rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, default_dst, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Fail to add RX roce ipsec miss rule err=%d\n",
			      err);
		goto fail_add_default_rule;
	}

	roce->roce_miss.rule = rule;

	kvfree(spec);
	return 0;

fail_add_default_rule:
	mlx5_del_flow_rules(&roce->rule);
fail_add_rule:
	kvfree(spec);
	return err;
}

static int ipsec_roce_rx_rules(struct mlx5e_ipsec_rx *rx, struct mlx5_flow_destination *defdst,
			       struct mlx5_core_dev *mdev)
{
	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
	struct mlx5_flow_group *g;
	void *outer_headers_c;
	u32 *in;
	int err = 0;
	int ix = 0;
	u8 *mc;

	if (!rx->roce.ns_rdma)
		return 0;

	in = kvzalloc(inlen, GFP_KERNEL);
	if (!in)
		return -ENOMEM;

	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
	outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);

	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
	MLX5_SET_CFG(in, start_flow_index, ix);
	ix += 1;
	MLX5_SET_CFG(in, end_flow_index, ix - 1);
	g = mlx5_create_flow_group(rx->roce.ft, in);
	if (IS_ERR(g)) {
		err = PTR_ERR(g);
		mlx5_core_err(mdev, "Fail to create ipsec rx roce group at nic err=%d\n", err);
		goto fail_group;
	}
	rx->roce.g = g;

	memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in));
	MLX5_SET_CFG(in, start_flow_index, ix);
	ix += 1;
	MLX5_SET_CFG(in, end_flow_index, ix - 1);
	g = mlx5_create_flow_group(rx->roce.ft, in);
	if (IS_ERR(g)) {
		err = PTR_ERR(g);
		mlx5_core_err(mdev, "Fail to create ipsec rx roce miss group at nic err=%d\n",
			      err);
		goto fail_mgroup;
	}
	rx->roce.roce_miss.group = g;

	err = ipsec_roce_rx_rule_setup(defdst, &rx->roce, mdev);
	if (err)
		goto fail_setup_rule;

	kvfree(in);
	return 0;

fail_setup_rule:
	mlx5_destroy_flow_group(rx->roce.roce_miss.group);
fail_mgroup:
	mlx5_destroy_flow_group(rx->roce.g);
fail_group:
	kvfree(in);
	return err;
}

static int ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv,
				      struct mlx5e_ipsec_rx *rx,
				      struct mlx5_flow_destination *defdst)
{
	struct mlx5_core_dev *mdev = priv->mdev;
	struct mlx5_flow_destination dest[2] = {};
	int err = 0;

	mutex_lock(&rx->ft.mutex);
	/* IPsec RoCE RX rules */
	err = ipsec_roce_rx_rules(rx, defdst, mdev);
	if (err)
		goto out;

	/* IPsec Rx IP Status table rule */
	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
	if (rx->roce.ft)
		dest[0].ft = rx->roce.ft;
	else
		dest[0].ft = priv->fts.vlan.t;

	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
        dest[1].counter_id = mlx5_fc_id(rx->fc->cnt);
        err = ipsec_status_rule(mdev, rx, dest);
        if (err)
                goto err_roce_rules_destroy;

	if (!rx->chains) {
		/* IPsec Rx IP policy default miss rule */
		err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, defdst);
		if (err)
			goto err_status_rule_destroy;
	}

	/* FIXME: This is workaround to current design
	 * which installs SA on firt packet. So we need to forward this
	 * packet to the stack. It doesn't work with RoCE and eswitch traffic,
	 */
	err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, defdst);
	if (err)
		goto err_status_sa_rule_destroy;

	mutex_unlock(&rx->ft.mutex);
	return 0;

err_status_sa_rule_destroy:
	if (!rx->chains) {
		mlx5_del_flow_rules(&rx->pol.rule);
		mlx5_destroy_flow_group(rx->pol.group);
	}
err_status_rule_destroy:
	mlx5_del_flow_rules(&rx->status.rule);
	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
err_roce_rules_destroy:
	ipsec_fs_rx_roce_rules_destroy(&rx->roce);
out:
	mutex_unlock(&rx->ft.mutex);
	return err;
}

static int ipsec_fs_rx_roce_tables_create(struct mlx5e_ipsec_rx *rx,
					  int rx_init_level, int rdma_init_level)
{
	struct mlx5_flow_table_attr ft_attr = {};
	struct mlx5_flow_table *ft;
	int err = 0;

	if (!rx->roce.ns_rdma)
		return 0;

	ft_attr.max_fte = 2;
	ft_attr.level = rx_init_level;
	ft = mlx5_create_flow_table(rx->ns, &ft_attr);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		return err;
	}
	rx->roce.ft = ft;

	ft_attr.max_fte = 0;
	ft_attr.level = rdma_init_level;
	ft = mlx5_create_flow_table(rx->roce.ns_rdma, &ft_attr);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto out;
	}
	rx->roce.ft_rdma = ft;

	return 0;
out:
	mlx5_destroy_flow_table(rx->roce.ft);
	rx->roce.ft = NULL;
	return err;
}

static int
ipsec_fs_rx_ip_type_catchall_rules_create(struct mlx5e_priv *priv,
                                          struct mlx5_flow_destination *defdst)
{
	struct mlx5_core_dev *mdev = priv->mdev;
	struct mlx5e_ipsec *ipsec = priv->ipsec;
	struct mlx5_flow_destination dst = {};
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_spec *spec;
	int err = 0;

	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
	if (!spec) {
		return -ENOMEM;
	}
	dst.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;

	/* Set rule for ipv4 packets */
	dst.ft = ipsec->rx_ipv4->ft.pol;
	setup_fte_ip_version(spec, AF_INET);
	rule = mlx5_add_flow_rules(ipsec->rx_ip_type->ft, spec, &flow_act, &dst, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Failed to add ipv4 rule to ip_type table err=%d\n",
			      err);
		goto out;
	}
	ipsec->rx_ip_type->ipv4_rule = rule;

	/* Set rule for ipv6 packets */
	dst.ft = ipsec->rx_ipv6->ft.pol;
	setup_fte_ip_version(spec, AF_INET6);
	rule = mlx5_add_flow_rules(ipsec->rx_ip_type->ft, spec, &flow_act, &dst, 1);
	if (IS_ERR(rule)) {
		err = PTR_ERR(rule);
		mlx5_core_err(mdev, "Failed to add ipv6 rule to ip_type table err=%d\n",
			      err);
		goto fail_add_ipv6_rule;
	}
	ipsec->rx_ip_type->ipv6_rule = rule;

	/* set miss rule */
	err = ipsec_miss_create(mdev, ipsec->rx_ip_type->ft, &ipsec->rx_ip_type->miss, defdst);
	if (err) {
		mlx5_core_err(mdev, "Failed to add miss rule to ip_type table err=%d\n",
			          err);
		goto fail_miss_rule;
	}

	goto out;

fail_miss_rule:
	mlx5_del_flow_rules(&ipsec->rx_ip_type->ipv6_rule);
fail_add_ipv6_rule:
	mlx5_del_flow_rules(&ipsec->rx_ip_type->ipv4_rule);
out:
	kvfree(spec);
	return err;
}

static int
ipsec_fs_rx_ip_type_table_create(struct mlx5e_priv *priv,
                                 int level)
{
	struct mlx5e_ipsec *ipsec = priv->ipsec;
	struct mlx5_flow_table *ft;
	int err = 0;

	/* Create rx ip type table */
	ft = ipsec_rx_ft_create(ipsec->rx_ip_type->ns, level, 0, 1);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto out;
	}
	ipsec->rx_ip_type->ft = ft;

	priv->fts.ipsec_ft = priv->ipsec->rx_ip_type->ft;

out:
	return err;
}

static int ipsec_fs_rx_table_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx,
				    int rx_init_level, int rdma_init_level)
{
	struct mlx5_flow_namespace *ns = rx->ns;
	struct mlx5_flow_table *ft;
	int err = 0;

	mutex_lock(&rx->ft.mutex);

	/* IPsec Rx IP SA table create */
	ft = ipsec_rx_ft_create(ns, rx_init_level + 1, 0, 1);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto out;
	}
	rx->ft.sa = ft;

	/* IPsec Rx IP Status table create */
	ft = ipsec_rx_ft_create(ns, rx_init_level + 2, 0, 1);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_sa_table_destroy;
	}
	rx->ft.status = ft;

	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) {
		rx->chains = ipsec_chains_create(mdev, rx->ft.sa,
				MLX5_FLOW_NAMESPACE_KERNEL, 0,
				rx_init_level, &rx->ft.pol);
		if (IS_ERR(rx->chains)) {
			err = PTR_ERR(rx->chains);
			goto err_status_table_destroy;
		}
	} else {
		ft = ipsec_rx_ft_create(ns, rx_init_level, 0, 1);
		if (IS_ERR(ft)) {
			err = PTR_ERR(ft);
			goto err_status_table_destroy;
		}
		rx->ft.pol = ft;
	}

	/* IPsec RoCE RX tables create*/
	err = ipsec_fs_rx_roce_tables_create(rx, rx_init_level + 3,
					     rdma_init_level);
	if (err)
		goto err_pol_table_destroy;

	goto out;

err_pol_table_destroy:
	mlx5_destroy_flow_table(rx->ft.pol);
err_status_table_destroy:
	mlx5_destroy_flow_table(rx->ft.status);
err_sa_table_destroy:
	mlx5_destroy_flow_table(rx->ft.sa);
out:
	mutex_unlock(&rx->ft.mutex);
	return err;
}

#define NIC_RDMA_BOTH_DIRS_CAPS (MLX5_FT_NIC_RX_2_NIC_RX_RDMA | MLX5_FT_NIC_TX_RDMA_2_NIC_TX)

static void mlx5e_accel_ipsec_fs_init_roce(struct mlx5e_ipsec *ipsec)
{
	struct mlx5_core_dev *mdev = ipsec->mdev;
	struct mlx5_flow_namespace *ns;

	if ((MLX5_CAP_GEN_2(ipsec->mdev, flow_table_type_2_type) &
	      NIC_RDMA_BOTH_DIRS_CAPS) != NIC_RDMA_BOTH_DIRS_CAPS) {
		mlx5_core_dbg(mdev, "Failed to init roce ns, capabilities not supported\n");
		return;
	}

	ns = mlx5_get_flow_namespace(ipsec->mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC);
	if (!ns) {
		mlx5_core_err(mdev, "Failed to init roce rx ns\n");
		return;
	}

	ipsec->rx_ipv4->roce.ns_rdma = ns;
	ipsec->rx_ipv6->roce.ns_rdma = ns;

	ns = mlx5_get_flow_namespace(ipsec->mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC);
	if (!ns) {
		ipsec->rx_ipv4->roce.ns_rdma = NULL;
		ipsec->rx_ipv6->roce.ns_rdma = NULL;
		mlx5_core_err(mdev, "Failed to init roce tx ns\n");
		return;
	}

	ipsec->tx->roce.ns = ns;
}

int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
{
	if (sa_entry->attrs.dir == IPSEC_DIR_OUTBOUND)
		return tx_add_rule(sa_entry);

	return rx_add_rule(sa_entry);
}

void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
{
	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);

	mlx5_del_flow_rules(&ipsec_rule->rule);
	mlx5_del_flow_rules(&ipsec_rule->kspi_rule);
	if (ipsec_rule->vid_zero_rule != NULL)
		mlx5_del_flow_rules(&ipsec_rule->vid_zero_rule);
	if (ipsec_rule->reqid_rule != NULL)
		mlx5_del_flow_rules(&ipsec_rule->reqid_rule);
	mlx5_fc_destroy(mdev, ipsec_rule->fc);
	mlx5_packet_reformat_dealloc(mdev, ipsec_rule->pkt_reformat);
	if (sa_entry->attrs.dir == IPSEC_DIR_OUTBOUND) {
		tx_ft_put(sa_entry->ipsec);
		return;
	}

	if (ipsec_rule->modify_hdr != NULL)
		mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);
}

int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
{
	if (pol_entry->attrs.dir == IPSEC_DIR_OUTBOUND)
		return tx_add_policy(pol_entry);

	return rx_add_policy(pol_entry);
}

void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
{
	struct mlx5e_ipsec_rule *ipsec_rule = &pol_entry->ipsec_rule;
	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);

	mlx5_del_flow_rules(&ipsec_rule->rule);
	if (ipsec_rule->vid_zero_rule != NULL)
		mlx5_del_flow_rules(&ipsec_rule->vid_zero_rule);

	if (pol_entry->attrs.dir == IPSEC_DIR_INBOUND) {
		struct mlx5e_ipsec_rx *rx;

                rx = (pol_entry->attrs.family == AF_INET)
                         ? pol_entry->ipsec->rx_ipv4
                         : pol_entry->ipsec->rx_ipv6;
                if (rx->chains)
                        ipsec_chains_put_table(rx->chains,
                                               pol_entry->attrs.prio);
                return;
	}

	if (ipsec_rule->modify_hdr)
		mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);

	tx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.prio);
}

void mlx5e_accel_ipsec_fs_rx_catchall_rules_destroy(struct mlx5e_priv *priv)
{
	/* Check if IPsec supported */
	if (!priv->ipsec)
		return;

	ipsec_fs_rx_ip_type_catchall_rule_destroy(priv->ipsec->rx_ip_type);
	ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv4);
	ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
}

int mlx5e_accel_ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv)
{
	struct mlx5e_ipsec *ipsec = priv->ipsec;
	struct mlx5_flow_destination dest = {};
	int err = 0;

	/* Check if IPsec supported */
	if (!ipsec)
		return 0;

	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
	dest.ft = priv->fts.vlan.t;
	err = ipsec_fs_rx_catchall_rules(priv, ipsec->rx_ipv6, &dest);
	if (err)
		goto out;

	err = ipsec_fs_rx_catchall_rules(priv, ipsec->rx_ipv4, &dest);
	if (err)
		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);

	err = ipsec_fs_rx_ip_type_catchall_rules_create(priv, &dest);
	if (err) {
		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv4);
	}

out:
	return err;
}

void mlx5e_accel_ipsec_fs_rx_tables_destroy(struct mlx5e_priv *priv)
{
	struct mlx5_core_dev *mdev = priv->mdev;
	struct mlx5e_ipsec *ipsec = priv->ipsec;

	/* Check if IPsec supported */
	if (!ipsec)
		return;

	mlx5_destroy_flow_table(ipsec->rx_ip_type->ft);
	ipsec_fs_rx_table_destroy(mdev, ipsec->rx_ipv6);
	ipsec_fs_rx_table_destroy(mdev, ipsec->rx_ipv4);
}

int mlx5e_accel_ipsec_fs_rx_tables_create(struct mlx5e_priv *priv)
{
	struct mlx5e_ipsec *ipsec = priv->ipsec;
	int err = 0;

	/* Check if IPsec supported */
	if (!ipsec)
		return 0;

	err = ipsec_fs_rx_ip_type_table_create(priv, 0);
	if (err)
		return err;

	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv4, 1, 0);
	if (err)
		goto err_ipv4_table;

	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv6, 5, 1);
	if (err)
		goto err_ipv6_table;

	return 0;

err_ipv6_table:
	ipsec_fs_rx_table_destroy(priv->mdev, ipsec->rx_ipv4);
err_ipv4_table:
	mlx5_destroy_flow_table(ipsec->rx_ip_type->ft);
	return err;
}

void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
{
	WARN_ON(ipsec->tx->ft.refcnt);
	mutex_destroy(&ipsec->rx_ipv6->ft.mutex);
	mutex_destroy(&ipsec->rx_ipv4->ft.mutex);
	mutex_destroy(&ipsec->tx->ft.mutex);
	ipsec_fs_destroy_counters(ipsec);
	kfree(ipsec->rx_ip_type);
	kfree(ipsec->rx_ipv6);
	kfree(ipsec->rx_ipv4);
	kfree(ipsec->tx);
}

int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
{
	struct mlx5_flow_namespace *tns, *rns;
	int err = -ENOMEM;

	tns = mlx5_get_flow_namespace(ipsec->mdev, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC);
	if (!tns)
		return -EOPNOTSUPP;

	rns = mlx5_get_flow_namespace(ipsec->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
	if (!rns)
		return -EOPNOTSUPP;

	ipsec->tx = kzalloc(sizeof(*ipsec->tx), GFP_KERNEL);
	if (!ipsec->tx)
		return -ENOMEM;

	ipsec->rx_ip_type = kzalloc(sizeof(*ipsec->rx_ip_type), GFP_KERNEL);
	if (!ipsec->rx_ip_type)
		goto err_tx;

	ipsec->rx_ipv4 = kzalloc(sizeof(*ipsec->rx_ipv4), GFP_KERNEL);
	if (!ipsec->rx_ipv4)
		goto err_ip_type;

	ipsec->rx_ipv6 = kzalloc(sizeof(*ipsec->rx_ipv6), GFP_KERNEL);
	if (!ipsec->rx_ipv6)
		goto err_rx_ipv4;

	err = ipsec_fs_init_counters(ipsec);
	if (err)
		goto err_rx_ipv6;

	ipsec->tx->ns = tns;
	mutex_init(&ipsec->tx->ft.mutex);
	ipsec->rx_ip_type->ns = rns;
	ipsec->rx_ipv4->ns = rns;
	ipsec->rx_ipv6->ns = rns;
	mutex_init(&ipsec->rx_ipv4->ft.mutex);
	mutex_init(&ipsec->rx_ipv6->ft.mutex);

	mlx5e_accel_ipsec_fs_init_roce(ipsec);

	return 0;

err_rx_ipv6:
	kfree(ipsec->rx_ipv6);
err_rx_ipv4:
	kfree(ipsec->rx_ipv4);
err_ip_type:
	kfree(ipsec->rx_ip_type);
err_tx:
	kfree(ipsec->tx);
	return err;
}

void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry)
{
        struct mlx5e_ipsec_sa_entry sa_entry_shadow = {};
        int err;

        memcpy(&sa_entry_shadow, sa_entry, sizeof(*sa_entry));
        memset(&sa_entry_shadow.ipsec_rule, 0x00, sizeof(sa_entry->ipsec_rule));

        err = mlx5e_accel_ipsec_fs_add_rule(&sa_entry_shadow);
        if (err)
                return;
        mlx5e_accel_ipsec_fs_del_rule(sa_entry);
        memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry));
}
