Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net 1/3] net: sched: ife: signal not finding metaid
From: Jamal Hadi Salim @ 2018-04-19 12:08 UTC (permalink / raw)
  To: yotam gigi, Alexander Aring
  Cc: davem, Cong Wang, Jiří Pírko, Yuval Mintz, netdev,
	kernel
In-Reply-To: <CANnrxJjLvzoDiMWmv0Ad-O44N-Vc=8Jjm2KrEKxiLt9a9fcNmA@mail.gmail.com>

On 19/04/18 01:37 AM, yotam gigi wrote:
> On Thu, Apr 19, 2018 at 12:35 AM, Alexander Aring <aring@mojatatu.com> wrote:
>> We need to record stats for received metadata that we dont know how
>> to process. Have find_decode_metaid() return -ENOENT to capture this.
> 
> Agree.
> 
>>
>> Signed-off-by: Alexander Aring <aring@mojatatu.com>
> 
> Reviewed-by: Yotam Gigi <yotam.gi@gmail.com>

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

^ permalink raw reply

* Re: [PATCH] net: phy: TLK10X initial driver submission
From: Andrew Lunn @ 2018-04-19 12:09 UTC (permalink / raw)
  To: Måns Andersson
  Cc: Rob Herring, Mark Rutland, Florian Fainelli, netdev, devicetree,
	linux-kernel
In-Reply-To: <20180419082816.109338-1-mans.andersson@nibe.se>

On Thu, Apr 19, 2018 at 10:28:16AM +0200, Måns Andersson wrote:
> From: Mans Andersson <mans.andersson@nibe.se>
> 
> Add suport for the TI TLK105 and TLK106 10/100Mbit ethernet phys.
> 
> In addition the TLK10X needs to be removed from DP83848 driver as the
> power back off support is added here for this device.
> 
> Datasheet:
> http://www.ti.com/lit/gpn/tlk106
> ---
>  .../devicetree/bindings/net/ti,tlk10x.txt          |  27 +++
>  drivers/net/phy/Kconfig                            |   5 +
>  drivers/net/phy/Makefile                           |   1 +
>  drivers/net/phy/dp83848.c                          |   3 -
>  drivers/net/phy/tlk10x.c                           | 209 +++++++++++++++++++++
>  5 files changed, 242 insertions(+), 3 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/net/ti,tlk10x.txt
>  create mode 100644 drivers/net/phy/tlk10x.c
> 
> diff --git a/Documentation/devicetree/bindings/net/ti,tlk10x.txt b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> new file mode 100644
> index 0000000..371d0d7
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> @@ -0,0 +1,27 @@
> +* Texas Instruments - TLK105 / TLK106 ethernet PHYs
> +
> +Required properties:
> +	- reg - The ID number for the phy, usually a small integer
> +
> +Optional properties:
> +	- ti,power-back-off - Power Back Off Level
> +		Please refer to data sheet chapter 8.6 and TI Application
> +		Note SLLA3228
> +		0 - Normal Operation
> +		1 - Level 1 (up to 140m cable between TLK link partners)
> +		2 - Level 2 (up to 100m cable between TLK link partners)
> +		3 - Level 3 (up to 80m cable between TLK link partners)

Hi Måns

Device tree is all about board properties. In most cases, power back
off is not a board properties, since it depends on the cable length
and the peer board. If however, your board has two PHYs back to back,
say to connect to an Ethernet switch, that would be a valid board
property.

How are you using this?

I know of others who would like such a configuration. Marvell PHYs can
do something similar. I've always suggested adding a PHY tunable. Pass
the cable length in meters and let the PHY driver pick the nearest it
can do, rounding up. The Marvell PHYs also support measuring the cable
length as part of the cable diagnostics. So it would be good to
reserve a configuration value to mean 'auto' - measure the cable and
then pick the best power back off. Quickly scanning the data sheet, i
see that this PHY also has the ability to measure the cable length.

> +static int tlk10x_read(struct phy_device *phydev, int reg)
> +{
> +	if (reg & ~0x1f) {
> +		/* Extended register */
> +		phy_write(phydev, TLK10X_REGCR, 0x001F);
> +		phy_write(phydev, TLK10X_ADDAR, reg);
> +		phy_write(phydev, TLK10X_REGCR, 0x401F);
> +		reg = TLK10X_ADDAR;
> +	}
> +
> +	return phy_read(phydev, reg);
> +}
> +
> +static int tlk10x_write(struct phy_device *phydev, int reg, int val)
> +{
> +	if (reg & ~0x1f) {
> +		/* Extended register */
> +		phy_write(phydev, TLK10X_REGCR, 0x001F);
> +		phy_write(phydev, TLK10X_ADDAR, reg);
> +		phy_write(phydev, TLK10X_REGCR, 0x401F);
> +		reg = TLK10X_ADDAR;
> +	}
> +
> +	return phy_write(phydev, reg, val);
> +}

This looks to be phy_read_mmd() and phy_write_mmd(). If so, please use
them, they get the locking correct.


> +#ifdef CONFIG_OF_MDIO
> +static int tlk10x_of_init(struct phy_device *phydev)
> +{
> +	struct tlk10x_private *tlk10x = phydev->priv;
> +	struct device *dev = &phydev->mdio.dev;
> +	struct device_node *of_node = dev->of_node;
> +	int ret;
> +
> +	if (!of_node)
> +		return 0;
> +
> +	ret = of_property_read_u32(of_node, "ti,power-back-off",
> +				   &tlk10x->pwrbo_level);
> +	if (ret) {
> +		dev_err(dev, "missing ti,power-back-off property");
> +		tlk10x->pwrbo_level = 0;
> +	}

If we decide to accept this, you should do range checking, and return
-EINVAL if the value is out of range.

> +static int tlk10x_config_init(struct phy_device *phydev)
> +{
> +	int ret, reg;
> +	struct tlk10x_private *tlk10x;
> +
> +	ret = genphy_config_init(phydev);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (!phydev->priv) {
> +		tlk10x = devm_kzalloc(&phydev->mdio.dev, sizeof(*tlk10x),
> +				      GFP_KERNEL);
> +		if (!tlk10x)
> +			return -ENOMEM;
> +
> +		phydev->priv = tlk10x;
> +		ret = tlk10x_of_init(phydev);
> +		if (ret)
> +			return ret;
> +	} else {
> +		tlk10x = (struct tlk10x_private *)phydev->priv;
> +	}

This allocation should be done in .probe

> +
> +	// Power back off
> +	if (tlk10x->pwrbo_level < 0 || tlk10x->pwrbo_level > 3)
> +		tlk10x->pwrbo_level = 0;
> +	reg = tlk10x_read(phydev, TLK10X_PWRBOCR);
> +	reg = ((reg & ~TLK10X_PWRBOCR_MASK)
> +		| (tlk10x->pwrbo_level << 6));
> +	ret = tlk10x_write(phydev, TLK10X_PWRBOCR, reg);
> +	if (ret < 0) {
> +		dev_err(&phydev->mdio.dev,
> +			"unable to set power back-off (err=%d)\n", ret);
> +		return ret;
> +	}
> +	dev_info(&phydev->mdio.dev, "power back-off set to level %d\n",
> +		 tlk10x->pwrbo_level);
> +
> +	return 0;
> +}

  Andrew

^ permalink raw reply

* Re: [PATCH net 2/3] net: sched: ife: handle malformed tlv length
From: Jamal Hadi Salim @ 2018-04-19 12:09 UTC (permalink / raw)
  To: yotam gigi, Alexander Aring
  Cc: davem, Cong Wang, Jiří Pírko, Yuval Mintz, netdev,
	kernel
In-Reply-To: <CANnrxJidq70VAmDza63cEkpd80c=VCxn6hg=m4Ko5oXYML82Ag@mail.gmail.com>

On 19/04/18 01:37 AM, yotam gigi wrote:
> On Thu, Apr 19, 2018 at 12:35 AM, Alexander Aring <aring@mojatatu.com> wrote:
>> There is currently no handling to check on a invalid tlv length. This
>> patch adds such handling to avoid killing the kernel with a malformed
>> ife packet.
> 
> That's very important. Thanks for that!
> 
>>
>> Signed-off-by: Alexander Aring <aring@mojatatu.com>

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

cheers,
jamal

^ permalink raw reply

* Re: [PATCH net 3/3] net: sched: ife: check on metadata length
From: Jamal Hadi Salim @ 2018-04-19 12:10 UTC (permalink / raw)
  To: yotam gigi, Alexander Aring
  Cc: davem, Cong Wang, Jiří Pírko, Yuval Mintz, netdev,
	kernel
In-Reply-To: <CANnrxJhUdk6s9_oRRyV+iC7Q_NzAFk5b9=FW5oGtuOuiFdHFvg@mail.gmail.com>

On 19/04/18 01:37 AM, yotam gigi wrote:
> On Thu, Apr 19, 2018 at 12:35 AM, Alexander Aring <aring@mojatatu.com> wrote:
>> This patch checks if sk buffer is available to dererence ife header. If
>> not then NULL will returned to signal an malformed ife packet. This
>> avoids to crashing the kernel from outside.
>>
>> Signed-off-by: Alexander Aring <aring@mojatatu.com>
> 
> Reviewed-by: Yotam Gigi <yotam.gi@gmail.com>
>

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

cheers,
jamal

^ permalink raw reply

* Re: [RFC PATCH ghak32 V2 04/13] audit: add containerid filtering
From: Richard Guy Briggs @ 2018-04-19 12:17 UTC (permalink / raw)
  To: Paul Moore
  Cc: cgroups, containers, linux-api, Linux-Audit Mailing List,
	linux-fsdevel, LKML, netdev, ebiederm, luto, jlayton, carlos,
	dhowells, viro, simo, Eric Paris, serge
In-Reply-To: <CAHC9VhRVGTCVJxG3Etcs-aOpr71A7xGsn5VPhskUG35rmQ7WUw@mail.gmail.com>

On 2018-04-18 20:24, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > Implement container ID filtering using the AUDIT_CONTAINERID field name
> > to send an 8-character string representing a u64 since the value field
> > is only u32.
> >
> > Sending it as two u32 was considered, but gathering and comparing two
> > fields was more complex.
> 
> My only worry here is that you aren't really sending a string in the
> ASCII sense, you are sending an 8 byte buffer (that better be NUL
> terminated) that happens to be an unsigned 64-bit integer.  To be
> clear, I'm okay with that (it's protected by AUDIT_CONTAINERID), and
> the code is okay with that, I just want us to pause for a minute and
> make sure that is an okay thing to do long term.

I already went through that process and warned of it 7 weeks ago.  As
already noted, That was preferable to two seperate u32 fields that
depend on each other making comparisons more complicated.  Using two
seperate fields to configure the rule could be gated for validity, then
the result stored in a special rule field, but I wasn't keen about that
approach.

> > The feature indicator is AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER.
> >
> > This requires support from userspace to be useful.
> > See: https://github.com/linux-audit/audit-userspace/issues/40
> > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > ---
> >  include/linux/audit.h      |  1 +
> >  include/uapi/linux/audit.h |  5 ++++-
> >  kernel/audit.h             |  1 +
> >  kernel/auditfilter.c       | 47 ++++++++++++++++++++++++++++++++++++++++++++++
> >  kernel/auditsc.c           |  3 +++
> >  5 files changed, 56 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/audit.h b/include/linux/audit.h
> > index 3acbe9d..f10ca1b 100644
> > --- a/include/linux/audit.h
> > +++ b/include/linux/audit.h
> > @@ -76,6 +76,7 @@ struct audit_field {
> >         u32                             type;
> >         union {
> >                 u32                     val;
> > +               u64                     val64;
> >                 kuid_t                  uid;
> >                 kgid_t                  gid;
> >                 struct {
> > diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> > index e83ccbd..8443a8f 100644
> > --- a/include/uapi/linux/audit.h
> > +++ b/include/uapi/linux/audit.h
> > @@ -262,6 +262,7 @@
> >  #define AUDIT_LOGINUID_SET     24
> >  #define AUDIT_SESSIONID        25      /* Session ID */
> >  #define AUDIT_FSTYPE   26      /* FileSystem Type */
> > +#define AUDIT_CONTAINERID      27      /* Container ID */
> >
> >                                 /* These are ONLY useful when checking
> >                                  * at syscall exit time (AUDIT_AT_EXIT). */
> > @@ -342,6 +343,7 @@ enum {
> >  #define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER  0x00000010
> >  #define AUDIT_FEATURE_BITMAP_LOST_RESET                0x00000020
> >  #define AUDIT_FEATURE_BITMAP_FILTER_FS         0x00000040
> > +#define AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER        0x00000080
> >
> >  #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
> >                                   AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
> > @@ -349,7 +351,8 @@ enum {
> >                                   AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
> >                                   AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
> >                                   AUDIT_FEATURE_BITMAP_LOST_RESET | \
> > -                                 AUDIT_FEATURE_BITMAP_FILTER_FS)
> > +                                 AUDIT_FEATURE_BITMAP_FILTER_FS | \
> > +                                 AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER)
> >
> >  /* deprecated: AUDIT_VERSION_* */
> >  #define AUDIT_VERSION_LATEST           AUDIT_FEATURE_BITMAP_ALL
> > diff --git a/kernel/audit.h b/kernel/audit.h
> > index 214e149..aaa651a 100644
> > --- a/kernel/audit.h
> > +++ b/kernel/audit.h
> > @@ -234,6 +234,7 @@ static inline int audit_hash_ino(u32 ino)
> >
> >  extern int audit_match_class(int class, unsigned syscall);
> >  extern int audit_comparator(const u32 left, const u32 op, const u32 right);
> > +extern int audit_comparator64(const u64 left, const u32 op, const u64 right);
> >  extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
> >  extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
> >  extern int parent_len(const char *path);
> > diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
> > index d7a807e..c4c8746 100644
> > --- a/kernel/auditfilter.c
> > +++ b/kernel/auditfilter.c
> > @@ -410,6 +410,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
> >         /* FALL THROUGH */
> >         case AUDIT_ARCH:
> >         case AUDIT_FSTYPE:
> > +       case AUDIT_CONTAINERID:
> >                 if (f->op != Audit_not_equal && f->op != Audit_equal)
> >                         return -EINVAL;
> >                 break;
> > @@ -584,6 +585,14 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
> >                         }
> >                         entry->rule.exe = audit_mark;
> >                         break;
> > +               case AUDIT_CONTAINERID:
> > +                       if (f->val != sizeof(u64))
> > +                               goto exit_free;
> > +                       str = audit_unpack_string(&bufp, &remain, f->val);
> > +                       if (IS_ERR(str))
> > +                               goto exit_free;
> > +                       f->val64 = ((u64 *)str)[0];
> > +                       break;
> >                 }
> >         }
> >
> > @@ -666,6 +675,11 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
> >                         data->buflen += data->values[i] =
> >                                 audit_pack_string(&bufp, audit_mark_path(krule->exe));
> >                         break;
> > +               case AUDIT_CONTAINERID:
> > +                       data->buflen += data->values[i] = sizeof(u64);
> > +                       for (i = 0; i < sizeof(u64); i++)
> > +                               ((char *)bufp)[i] = ((char *)&f->val64)[i];
> > +                       break;
> >                 case AUDIT_LOGINUID_SET:
> >                         if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
> >                                 data->fields[i] = AUDIT_LOGINUID;
> > @@ -752,6 +766,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
> >                         if (!gid_eq(a->fields[i].gid, b->fields[i].gid))
> >                                 return 1;
> >                         break;
> > +               case AUDIT_CONTAINERID:
> > +                       if (a->fields[i].val64 != b->fields[i].val64)
> > +                               return 1;
> > +                       break;
> >                 default:
> >                         if (a->fields[i].val != b->fields[i].val)
> >                                 return 1;
> > @@ -1210,6 +1228,31 @@ int audit_comparator(u32 left, u32 op, u32 right)
> >         }
> >  }
> >
> > +int audit_comparator64(u64 left, u32 op, u64 right)
> > +{
> > +       switch (op) {
> > +       case Audit_equal:
> > +               return (left == right);
> > +       case Audit_not_equal:
> > +               return (left != right);
> > +       case Audit_lt:
> > +               return (left < right);
> > +       case Audit_le:
> > +               return (left <= right);
> > +       case Audit_gt:
> > +               return (left > right);
> > +       case Audit_ge:
> > +               return (left >= right);
> > +       case Audit_bitmask:
> > +               return (left & right);
> > +       case Audit_bittest:
> > +               return ((left & right) == right);
> > +       default:
> > +               BUG();
> > +               return 0;
> > +       }
> > +}
> > +
> >  int audit_uid_comparator(kuid_t left, u32 op, kuid_t right)
> >  {
> >         switch (op) {
> > @@ -1348,6 +1391,10 @@ int audit_filter(int msgtype, unsigned int listtype)
> >                                 result = audit_comparator(audit_loginuid_set(current),
> >                                                           f->op, f->val);
> >                                 break;
> > +                       case AUDIT_CONTAINERID:
> > +                               result = audit_comparator64(audit_get_containerid(current),
> > +                                                             f->op, f->val64);
> > +                               break;
> >                         case AUDIT_MSGTYPE:
> >                                 result = audit_comparator(msgtype, f->op, f->val);
> >                                 break;
> > diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> > index 65be110..2bba324 100644
> > --- a/kernel/auditsc.c
> > +++ b/kernel/auditsc.c
> > @@ -614,6 +614,9 @@ static int audit_filter_rules(struct task_struct *tsk,
> >                 case AUDIT_LOGINUID_SET:
> >                         result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
> >                         break;
> > +               case AUDIT_CONTAINERID:
> > +                       result = audit_comparator64(audit_get_containerid(tsk), f->op, f->val64);
> > +                       break;
> >                 case AUDIT_SUBJ_USER:
> >                 case AUDIT_SUBJ_ROLE:
> >                 case AUDIT_SUBJ_TYPE:
> > --
> > 1.8.3.1
> >
> > --
> > Linux-audit mailing list
> > Linux-audit@redhat.com
> > https://www.redhat.com/mailman/listinfo/linux-audit
> 
> 
> 
> -- 
> paul moore
> www.paul-moore.com

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* Re: [PATCH] net: phy: marvell: clear wol event before setting it
From: Andrew Lunn @ 2018-04-19 12:18 UTC (permalink / raw)
  To: Jisheng Zhang
  Cc: Florian Fainelli, David S. Miller, netdev, linux-kernel,
	Jingju Hou
In-Reply-To: <20180419160232.519d15be@xhacker.debian>

On Thu, Apr 19, 2018 at 04:02:32PM +0800, Jisheng Zhang wrote:
> From: Jingju Hou <Jingju.Hou@synaptics.com>
> 
> If WOL event happened once, the LED[2] interrupt pin will not be
> cleared unless reading the CSISR register. So clear the WOL event
> before enabling it.
> 
> Signed-off-by: Jingju Hou <Jingju.Hou@synaptics.com>
> Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
> ---
>  drivers/net/phy/marvell.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
> index c22e8e383247..b6abe1cbc84b 100644
> --- a/drivers/net/phy/marvell.c
> +++ b/drivers/net/phy/marvell.c
> @@ -115,6 +115,9 @@
>  /* WOL Event Interrupt Enable */
>  #define MII_88E1318S_PHY_CSIER_WOL_EIE			BIT(7)
>  
> +/* Copper Specific Interrupt Status Register */
> +#define MII_88E1318S_PHY_CSISR				0x13
> +
>  /* LED Timer Control Register */
>  #define MII_88E1318S_PHY_LED_TCR			0x12
>  #define MII_88E1318S_PHY_LED_TCR_FORCE_INT		BIT(15)
> @@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device *phydev,
>  		if (err < 0)
>  			goto error;
>  
> +		/* If WOL event happened once, the LED[2] interrupt pin
> +		 * will not be cleared unless reading the CSISR register.
> +		 * So clear the WOL event first before enabling it.
> +		 */
> +		phy_read(phydev, MII_88E1318S_PHY_CSISR);
> +

Hi Jisheng

The problem with this is, you could be clearing a real interrupt, link
down/up etc. If interrupts are in use, i think the normal interrupt
handling will clear the WOL interrupt? So can you make this read
conditional on !phy_interrupt_is_valid()?

	Andrew

^ permalink raw reply

* Re: [PATCH v4 1/9] net-next: phy: new Asix Electronics PHY driver
From: Andrew Lunn @ 2018-04-19 12:21 UTC (permalink / raw)
  To: Michael Schmitz
  Cc: netdev, fthain, geert, f.fainelli, linux-m68k, Michael.Karcher
In-Reply-To: <1524103526-12240-2-git-send-email-schmitzmic@gmail.com>

On Thu, Apr 19, 2018 at 02:05:18PM +1200, Michael Schmitz wrote:
> The Asix Electronics PHY found on the X-Surf 100 Amiga Zorro network
> card by Individual Computers is buggy, and needs the reset bit toggled
> as workaround to make a PHY soft reset succeed.
> 
> Add workaround driver just for this special case.
> 
> Suggested in xsurf100 patch series review by Andrew Lunn <andrew@lunn.ch>
> 
> Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH] net: phy: TLK10X initial driver submission
From: Miguel Ojeda @ 2018-04-19 12:24 UTC (permalink / raw)
  To: Måns Andersson
  Cc: Rob Herring, Mark Rutland, Andrew Lunn, Florian Fainelli,
	Network Development, devicetree, linux-kernel
In-Reply-To: <20180419082816.109338-1-mans.andersson@nibe.se>

On Thu, Apr 19, 2018 at 10:28 AM, Måns Andersson <mans.andersson@nibe.se> wrote:
> From: Mans Andersson <mans.andersson@nibe.se>
>
> Add suport for the TI TLK105 and TLK106 10/100Mbit ethernet phys.
>

Hi Mans,

Some quick notes.

> In addition the TLK10X needs to be removed from DP83848 driver as the
> power back off support is added here for this device.
>
> Datasheet:
> http://www.ti.com/lit/gpn/tlk106

Missing signature.

> ---
>  .../devicetree/bindings/net/ti,tlk10x.txt          |  27 +++
>  drivers/net/phy/Kconfig                            |   5 +
>  drivers/net/phy/Makefile                           |   1 +
>  drivers/net/phy/dp83848.c                          |   3 -
>  drivers/net/phy/tlk10x.c                           | 209 +++++++++++++++++++++
>  5 files changed, 242 insertions(+), 3 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/net/ti,tlk10x.txt
>  create mode 100644 drivers/net/phy/tlk10x.c
>
> diff --git a/Documentation/devicetree/bindings/net/ti,tlk10x.txt b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> new file mode 100644
> index 0000000..371d0d7
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> @@ -0,0 +1,27 @@
> +* Texas Instruments - TLK105 / TLK106 ethernet PHYs
> +
> +Required properties:
> +       - reg - The ID number for the phy, usually a small integer
> +
> +Optional properties:
> +       - ti,power-back-off - Power Back Off Level
> +               Please refer to data sheet chapter 8.6 and TI Application
> +               Note SLLA3228
> +               0 - Normal Operation
> +               1 - Level 1 (up to 140m cable between TLK link partners)
> +               2 - Level 2 (up to 100m cable between TLK link partners)
> +               3 - Level 3 (up to 80m cable between TLK link partners)
> +
> +Default child nodes are standard Ethernet PHY device
> +nodes as described in Documentation/devicetree/bindings/net/phy.txt
> +
> +Example:
> +
> +       ethernet-phy@0 {
> +               reg = <0>;
> +               ti,power-back-off = <2>;
> +       };
> +
> +Datasheets and documentation can be found at:
> +http://www.ti.com/lit/gpn/tlk106
> +http://www.ti.com/lit/an/slla328/slla328.pdf
> diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
> index bdfbabb..c980240 100644
> --- a/drivers/net/phy/Kconfig
> +++ b/drivers/net/phy/Kconfig
> @@ -295,6 +295,11 @@ config DP83867_PHY
>         ---help---
>           Currently supports the DP83867 PHY.
>
> +config TLK10X_PHY
> +       tristate "Texas Instruments TLK10x PHY"
> +       ---help---
> +         Supports the TLK105 and TLK106 PHYs.
> +
>  config FIXED_PHY
>         tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
>         depends on PHYLIB
> diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
> index 01acbcb..37e4e02 100644
> --- a/drivers/net/phy/Makefile
> +++ b/drivers/net/phy/Makefile
> @@ -79,5 +79,6 @@ obj-$(CONFIG_ROCKCHIP_PHY)    += rockchip.o
>  obj-$(CONFIG_SMSC_PHY)         += smsc.o
>  obj-$(CONFIG_STE10XP)          += ste10Xp.o
>  obj-$(CONFIG_TERANETICS_PHY)   += teranetics.o
> +obj-$(CONFIG_TLK10X_PHY)       += tlk10x.o
>  obj-$(CONFIG_VITESSE_PHY)      += vitesse.o
>  obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
> diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
> index cd09c3a..435f401 100644
> --- a/drivers/net/phy/dp83848.c
> +++ b/drivers/net/phy/dp83848.c
> @@ -19,7 +19,6 @@
>  #define TI_DP83848C_PHY_ID             0x20005ca0
>  #define TI_DP83620_PHY_ID              0x20005ce0
>  #define NS_DP83848C_PHY_ID             0x20005c90
> -#define TLK10X_PHY_ID                  0x2000a210
>
>  /* Registers */
>  #define DP83848_MICR                   0x11 /* MII Interrupt Control Register */
> @@ -78,7 +77,6 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
>         { TI_DP83848C_PHY_ID, 0xfffffff0 },
>         { NS_DP83848C_PHY_ID, 0xfffffff0 },
>         { TI_DP83620_PHY_ID, 0xfffffff0 },
> -       { TLK10X_PHY_ID, 0xfffffff0 },
>         { }
>  };
>  MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
> @@ -105,7 +103,6 @@ static struct phy_driver dp83848_driver[] = {
>         DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"),
>         DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"),
>         DP83848_PHY_DRIVER(TI_DP83620_PHY_ID, "TI DP83620 10/100 Mbps PHY"),
> -       DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"),
>  };
>  module_phy_driver(dp83848_driver);
>
> diff --git a/drivers/net/phy/tlk10x.c b/drivers/net/phy/tlk10x.c
> new file mode 100644
> index 0000000..1efc81e
> --- /dev/null
> +++ b/drivers/net/phy/tlk10x.c
> @@ -0,0 +1,209 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/**
> + * Driver for the Texas Instruments TLK105 / TLK106
> + *
> + * Copyright (C) 2018 NIBE Industrier AB - http://www.nibe.com
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.

Since you are using the SPDX id, please remove the license text (which
is actually wrong: it seems you have cut the v2+ version and then
removed the last sentence of the first paragraph? :-).

> + */
> +
> +#include <linux/module.h>
> +#include <linux/phy.h>
> +#include <linux/of.h>
> +
> +#define TLK10X_PHY_ID                  0x2000a210
> +
> +/* Registers */
> +#define TLK10X_MICR                    0x11 /* MII Interrupt Control Reg */
> +#define TLK10X_MISR                    0x12 /* MII Interrupt Status Reg */
> +#define TLK10X_REGCR                   0x0d /* Register Control Register */
> +#define TLK10X_ADDAR                   0x0e /* Data Register */
> +#define TLK10X_PWRBOCR                 0xae /* Power Backoff Register */
> +
> +/* MICR Register Fields */
> +#define TLK10X_MICR_INT_OE             BIT(0) /* Interrupt Output Enable */
> +#define TLK10X_MICR_INTEN              BIT(1) /* Interrupt Enable */
> +
> +/* MISR Register Fields */
> +#define TLK10X_MISR_RHF_INT_EN         BIT(0) /* Receive Error Counter */
> +#define TLK10X_MISR_FHF_INT_EN         BIT(1) /* False Carrier Counter */
> +#define TLK10X_MISR_ANC_INT_EN         BIT(2) /* Auto-negotiation complete */
> +#define TLK10X_MISR_DUP_INT_EN         BIT(3) /* Duplex Status */
> +#define TLK10X_MISR_SPD_INT_EN         BIT(4) /* Speed status */
> +#define TLK10X_MISR_LINK_INT_EN                BIT(5) /* Link status */
> +#define TLK10X_MISR_ED_INT_EN          BIT(6) /* Energy detect */
> +#define TLK10X_MISR_LQM_INT_EN         BIT(7) /* Link Quality Monitor */
> +
> +/* PWRBOCR Register Fields */
> +#define TLK10X_PWRBOCR_MASK            0xe0 /* Power Backoff Mask */
> +
> +#define TLK10X_INT_EN_MASK             \
> +       (TLK10X_MISR_ANC_INT_EN |       \
> +        TLK10X_MISR_DUP_INT_EN |       \
> +        TLK10X_MISR_SPD_INT_EN |       \
> +        TLK10X_MISR_LINK_INT_EN)
> +
> +struct tlk10x_private {
> +       int pwrbo_level;
> +};
> +
> +static int tlk10x_read(struct phy_device *phydev, int reg)
> +{
> +       if (reg & ~0x1f) {

0x1f or ~0x1f should probably have a #define name.

> +               /* Extended register */
> +               phy_write(phydev, TLK10X_REGCR, 0x001F);
> +               phy_write(phydev, TLK10X_ADDAR, reg);
> +               phy_write(phydev, TLK10X_REGCR, 0x401F);
> +               reg = TLK10X_ADDAR;
> +       }
> +
> +       return phy_read(phydev, reg);
> +}
> +
> +static int tlk10x_write(struct phy_device *phydev, int reg, int val)
> +{
> +       if (reg & ~0x1f) {

Ditto.

> +               /* Extended register */
> +               phy_write(phydev, TLK10X_REGCR, 0x001F);
> +               phy_write(phydev, TLK10X_ADDAR, reg);
> +               phy_write(phydev, TLK10X_REGCR, 0x401F);
> +               reg = TLK10X_ADDAR;
> +       }
> +
> +       return phy_write(phydev, reg, val);
> +}
> +
> +#ifdef CONFIG_OF_MDIO

Maybe you want the #ifdef inside.

> +static int tlk10x_of_init(struct phy_device *phydev)
> +{
> +       struct tlk10x_private *tlk10x = phydev->priv;
> +       struct device *dev = &phydev->mdio.dev;
> +       struct device_node *of_node = dev->of_node;
> +       int ret;
> +
> +       if (!of_node)
> +               return 0;
> +
> +       ret = of_property_read_u32(of_node, "ti,power-back-off",
> +                                  &tlk10x->pwrbo_level);
> +       if (ret) {
> +               dev_err(dev, "missing ti,power-back-off property");
> +               tlk10x->pwrbo_level = 0;
> +       }
> +
> +       return 0;
> +}
> +#else
> +static int tlk10x_of_init(struct phy_device *phydev)
> +{
> +       return 0;
> +}
> +#endif /* CONFIG_OF_MDIO */
> +
> +static int tlk10x_config_init(struct phy_device *phydev)
> +{
> +       int ret, reg;
> +       struct tlk10x_private *tlk10x;
> +
> +       ret = genphy_config_init(phydev);
> +       if (ret < 0)
> +               return ret;
> +
> +       if (!phydev->priv) {
> +               tlk10x = devm_kzalloc(&phydev->mdio.dev, sizeof(*tlk10x),
> +                                     GFP_KERNEL);
> +               if (!tlk10x)
> +                       return -ENOMEM;
> +
> +               phydev->priv = tlk10x;
> +               ret = tlk10x_of_init(phydev);
> +               if (ret)
> +                       return ret;
> +       } else {
> +               tlk10x = (struct tlk10x_private *)phydev->priv;
> +       }
> +
> +       // Power back off
> +       if (tlk10x->pwrbo_level < 0 || tlk10x->pwrbo_level > 3)
> +               tlk10x->pwrbo_level = 0;
> +       reg = tlk10x_read(phydev, TLK10X_PWRBOCR);
> +       reg = ((reg & ~TLK10X_PWRBOCR_MASK)
> +               | (tlk10x->pwrbo_level << 6));

Maybe the 6 should have a name, or maybe a bigger macro for this would clarify.

> +       ret = tlk10x_write(phydev, TLK10X_PWRBOCR, reg);
> +       if (ret < 0) {
> +               dev_err(&phydev->mdio.dev,
> +                       "unable to set power back-off (err=%d)\n", ret);
> +               return ret;
> +       }
> +       dev_info(&phydev->mdio.dev, "power back-off set to level %d\n",
> +                tlk10x->pwrbo_level);
> +
> +       return 0;
> +}
> +
> +static int tlk10x_ack_interrupt(struct phy_device *phydev)
> +{
> +       int err = tlk10x_read(phydev, TLK10X_MISR);

Following the style of the rest of the file, shouldn't this be:

    if (err < 0)
        return err;

    return 0;

?

> +
> +       return err < 0 ? err : 0;
> +}
> +
> +static int tlk10x_config_intr(struct phy_device *phydev)
> +{
> +       int control, ret;
> +
> +       control = tlk10x_read(phydev, TLK10X_MICR);
> +       if (control < 0)
> +               return control;
> +
> +       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
> +               control |= TLK10X_MICR_INT_OE;
> +               control |= TLK10X_MICR_INTEN;
> +
> +               ret = tlk10x_write(phydev, TLK10X_MISR, TLK10X_INT_EN_MASK);
> +               if (ret < 0)
> +                       return ret;
> +       } else {
> +               control &= ~TLK10X_MICR_INTEN;
> +       }
> +
> +       return tlk10x_write(phydev, TLK10X_MICR, control);
> +}
> +
> +static struct phy_driver tlk10x_driver[] = {
> +       {
> +               .phy_id         = TLK10X_PHY_ID,
> +               .phy_id_mask    = 0xfffffff0,
> +               .name           = "TI TLK10X 10/100 Mbps PHY",
> +               .features       = PHY_BASIC_FEATURES,
> +               .flags          = PHY_HAS_INTERRUPT,
> +
> +               .config_init    = tlk10x_config_init,
> +               .soft_reset     = genphy_soft_reset,
> +
> +               /* IRQ related */
> +               .ack_interrupt  = tlk10x_ack_interrupt,
> +               .config_intr    = tlk10x_config_intr,
> +
> +               .suspend        = genphy_suspend,
> +               .resume         = genphy_resume,
> +       },
> +};
> +module_phy_driver(tlk10x_driver);
> +
> +static struct mdio_device_id __maybe_unused tlk10x_tbl[] = {
> +       { TLK10X_PHY_ID, 0xfffffff0 },
> +       { }
> +};
> +MODULE_DEVICE_TABLE(mdio, tlk10x_tbl);
> +
> +MODULE_DESCRIPTION("Texas Instruments TLK105 / TLK106 PHY driver");
> +MODULE_AUTHOR("Mans Andersson <mans.andersson@nibe.se>");
> +MODULE_LICENSE("GPL");

Should be "GPL v2".

Cheers,
Miguel

^ permalink raw reply

* Re: [RFC PATCH ghak32 V2 07/13] audit: add container aux record to watch/tree/mark
From: Richard Guy Briggs @ 2018-04-19 12:24 UTC (permalink / raw)
  To: Paul Moore
  Cc: cgroups, containers, linux-api, Linux-Audit Mailing List,
	linux-fsdevel, LKML, netdev, ebiederm, luto, jlayton, carlos,
	dhowells, viro, simo, Eric Paris, serge
In-Reply-To: <CAHC9VhTzp-r2TFytt1zTEpeGK=O5dEnLPFw-CdsM1ttpY0a30g@mail.gmail.com>

On 2018-04-18 20:42, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > Add container ID auxiliary record to mark, watch and tree rule
> > configuration standalone records.
> >
> > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > ---
> >  kernel/audit_fsnotify.c |  5 ++++-
> >  kernel/audit_tree.c     |  5 ++++-
> >  kernel/audit_watch.c    | 33 +++++++++++++++++++--------------
> >  3 files changed, 27 insertions(+), 16 deletions(-)
> >
> > diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
> > index 52f368b..18c110d 100644
> > --- a/kernel/audit_fsnotify.c
> > +++ b/kernel/audit_fsnotify.c
> > @@ -124,10 +124,11 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
> >  {
> >         struct audit_buffer *ab;
> >         struct audit_krule *rule = audit_mark->rule;
> > +       struct audit_context *context = audit_alloc_local();
> >
> >         if (!audit_enabled)
> >                 return;
> 
> Move the audit_alloc_local() after the audit_enabled check.

Already fixed in V3 as previously warned, by making all
AUDIT_CONFIG_CHANGE records SYSCALL auxiliary records.

> > -       ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > +       ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> >         if (unlikely(!ab))
> >                 return;
> >         audit_log_format(ab, "auid=%u ses=%u op=%s",
> > @@ -138,6 +139,8 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
> >         audit_log_key(ab, rule->filterkey);
> >         audit_log_format(ab, " list=%d res=1", rule->listnr);
> >         audit_log_end(ab);
> > +       audit_log_container_info(context, "config", audit_get_containerid(current));
> > +       audit_free_context(context);
> >  }
> >
> >  void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
> > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> > index 67e6956..7c085be 100644
> > --- a/kernel/audit_tree.c
> > +++ b/kernel/audit_tree.c
> > @@ -496,8 +496,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> >  static void audit_tree_log_remove_rule(struct audit_krule *rule)
> >  {
> >         struct audit_buffer *ab;
> > +       struct audit_context *context = audit_alloc_local();
> 
> Sort of independent of the audit container ID work, but shouldn't we
> have an audit_enabled check here?

Same.

> > -       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > +       ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> >         if (unlikely(!ab))
> >                 return;
> >         audit_log_format(ab, "op=remove_rule");
> > @@ -506,6 +507,8 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
> >         audit_log_key(ab, rule->filterkey);
> >         audit_log_format(ab, " list=%d res=1", rule->listnr);
> >         audit_log_end(ab);
> > +       audit_log_container_info(context, "config", audit_get_containerid(current));
> > +       audit_free_context(context);
> >  }
> >
> >  static void kill_rules(struct audit_tree *tree)
> > diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
> > index 9eb8b35..60d75a2 100644
> > --- a/kernel/audit_watch.c
> > +++ b/kernel/audit_watch.c
> > @@ -238,20 +238,25 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
> >
> >  static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
> >  {
> > -       if (audit_enabled) {
> > -               struct audit_buffer *ab;
> > -               ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > -               if (unlikely(!ab))
> > -                       return;
> > -               audit_log_format(ab, "auid=%u ses=%u op=%s",
> > -                                from_kuid(&init_user_ns, audit_get_loginuid(current)),
> > -                                audit_get_sessionid(current), op);
> > -               audit_log_format(ab, " path=");
> > -               audit_log_untrustedstring(ab, w->path);
> > -               audit_log_key(ab, r->filterkey);
> > -               audit_log_format(ab, " list=%d res=1", r->listnr);
> > -               audit_log_end(ab);
> > -       }
> > +       struct audit_buffer *ab;
> > +       struct audit_context *context = audit_alloc_local();
> > +
> > +       if (!audit_enabled)
> > +               return;
> 
> Same as above, do the allocation after the audit_enabled check.

Same.

> > +       ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > +       if (unlikely(!ab))
> > +               return;
> > +       audit_log_format(ab, "auid=%u ses=%u op=%s",
> > +                        from_kuid(&init_user_ns, audit_get_loginuid(current)),
> > +                        audit_get_sessionid(current), op);
> > +       audit_log_format(ab, " path=");
> > +       audit_log_untrustedstring(ab, w->path);
> > +       audit_log_key(ab, r->filterkey);
> > +       audit_log_format(ab, " list=%d res=1", r->listnr);
> > +       audit_log_end(ab);
> > +       audit_log_container_info(context, "config", audit_get_containerid(current));
> > +       audit_free_context(context);
> >  }
> 
> -- 
> paul moore
> www.paul-moore.com

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* Re: [PATCH v2 net 1/3] virtio_net: split out ctrl buffer
From: Jason Wang @ 2018-04-19 12:26 UTC (permalink / raw)
  To: Michael S. Tsirkin, linux-kernel
  Cc: Thomas Huth, Eric Dumazet, netdev, Cornelia Huck, virtualization,
	Mikulas Patocka, David Miller
In-Reply-To: <1524115776-334953-2-git-send-email-mst@redhat.com>



On 2018年04月19日 13:30, Michael S. Tsirkin wrote:
> When sending control commands, virtio net sets up several buffers for
> DMA. The buffers are all part of the net device which means it's
> actually allocated by kvmalloc so it's in theory (on extreme memory
> pressure) possible to get a vmalloc'ed buffer which on some platforms
> means we can't DMA there.
>
> Fix up by moving the DMA buffers into a separate structure.
>
> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>
> Changes from v1:
> 	build fix
>
>   drivers/net/virtio_net.c | 68 +++++++++++++++++++++++++++---------------------
>   1 file changed, 39 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7b187ec..3d0eff53 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -147,6 +147,17 @@ struct receive_queue {
>   	struct xdp_rxq_info xdp_rxq;
>   };
>   
> +/* Control VQ buffers: protected by the rtnl lock */
> +struct control_buf {
> +	struct virtio_net_ctrl_hdr hdr;
> +	virtio_net_ctrl_ack status;
> +	struct virtio_net_ctrl_mq mq;
> +	u8 promisc;
> +	u8 allmulti;
> +	u16 vid;
> +	u64 offloads;
> +};
> +
>   struct virtnet_info {
>   	struct virtio_device *vdev;
>   	struct virtqueue *cvq;
> @@ -192,14 +203,7 @@ struct virtnet_info {
>   	struct hlist_node node;
>   	struct hlist_node node_dead;
>   
> -	/* Control VQ buffers: protected by the rtnl lock */
> -	struct virtio_net_ctrl_hdr ctrl_hdr;
> -	virtio_net_ctrl_ack ctrl_status;
> -	struct virtio_net_ctrl_mq ctrl_mq;
> -	u8 ctrl_promisc;
> -	u8 ctrl_allmulti;
> -	u16 ctrl_vid;
> -	u64 ctrl_offloads;
> +	struct control_buf *ctrl;
>   
>   	/* Ethtool settings */
>   	u8 duplex;
> @@ -1454,25 +1458,25 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>   	/* Caller should know better */
>   	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
>   
> -	vi->ctrl_status = ~0;
> -	vi->ctrl_hdr.class = class;
> -	vi->ctrl_hdr.cmd = cmd;
> +	vi->ctrl->status = ~0;
> +	vi->ctrl->hdr.class = class;
> +	vi->ctrl->hdr.cmd = cmd;
>   	/* Add header */
> -	sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
> +	sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
>   	sgs[out_num++] = &hdr;
>   
>   	if (out)
>   		sgs[out_num++] = out;
>   
>   	/* Add return status. */
> -	sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
> +	sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
>   	sgs[out_num] = &stat;
>   
>   	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
>   	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
>   
>   	if (unlikely(!virtqueue_kick(vi->cvq)))
> -		return vi->ctrl_status == VIRTIO_NET_OK;
> +		return vi->ctrl->status == VIRTIO_NET_OK;
>   
>   	/* Spin for a response, the kick causes an ioport write, trapping
>   	 * into the hypervisor, so the request should be handled immediately.
> @@ -1481,7 +1485,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>   	       !virtqueue_is_broken(vi->cvq))
>   		cpu_relax();
>   
> -	return vi->ctrl_status == VIRTIO_NET_OK;
> +	return vi->ctrl->status == VIRTIO_NET_OK;
>   }
>   
>   static int virtnet_set_mac_address(struct net_device *dev, void *p)
> @@ -1593,8 +1597,8 @@ static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
>   	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
>   		return 0;
>   
> -	vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
> -	sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq));
> +	vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
> +	sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
>   				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
> @@ -1653,22 +1657,22 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>   	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
>   		return;
>   
> -	vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
> -	vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
> +	vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
> +	vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
>   
> -	sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
> +	sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
>   				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
>   		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
> -			 vi->ctrl_promisc ? "en" : "dis");
> +			 vi->ctrl->promisc ? "en" : "dis");
>   
> -	sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
> +	sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
>   				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
>   		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
> -			 vi->ctrl_allmulti ? "en" : "dis");
> +			 vi->ctrl->allmulti ? "en" : "dis");
>   
>   	uc_count = netdev_uc_count(dev);
>   	mc_count = netdev_mc_count(dev);
> @@ -1714,8 +1718,8 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>   	struct virtnet_info *vi = netdev_priv(dev);
>   	struct scatterlist sg;
>   
> -	vi->ctrl_vid = vid;
> -	sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
> +	vi->ctrl->vid = vid;
> +	sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
>   				  VIRTIO_NET_CTRL_VLAN_ADD, &sg))
> @@ -1729,8 +1733,8 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
>   	struct virtnet_info *vi = netdev_priv(dev);
>   	struct scatterlist sg;
>   
> -	vi->ctrl_vid = vid;
> -	sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
> +	vi->ctrl->vid = vid;
> +	sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
>   				  VIRTIO_NET_CTRL_VLAN_DEL, &sg))
> @@ -2126,9 +2130,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
>   static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
>   {
>   	struct scatterlist sg;
> -	vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads);
> +	vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);
>   
> -	sg_init_one(&sg, &vi->ctrl_offloads, sizeof(vi->ctrl_offloads));
> +	sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
>   				  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
> @@ -2351,6 +2355,7 @@ static void virtnet_free_queues(struct virtnet_info *vi)
>   
>   	kfree(vi->rq);
>   	kfree(vi->sq);
> +	kfree(vi->ctrl);
>   }
>   
>   static void _free_receive_bufs(struct virtnet_info *vi)
> @@ -2543,6 +2548,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
>   {
>   	int i;
>   
> +	vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
> +	if (!vi->ctrl)
> +		goto err_ctrl;
>   	vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
>   	if (!vi->sq)
>   		goto err_sq;
> @@ -2571,6 +2579,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
>   err_rq:
>   	kfree(vi->sq);
>   err_sq:
> +	kfree(vi->ctrl);
> +err_ctrl:
>   	return -ENOMEM;
>   }
>   

Acked-by: Jason Wang <jasowang@redhat.com>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH v2 net 2/3] virtio_net: fix adding vids on big-endian
From: Jason Wang @ 2018-04-19 12:26 UTC (permalink / raw)
  To: Michael S. Tsirkin, linux-kernel
  Cc: Mikulas Patocka, Eric Dumazet, David Miller, Thomas Huth,
	Cornelia Huck, virtualization, netdev
In-Reply-To: <1524115776-334953-3-git-send-email-mst@redhat.com>



On 2018年04月19日 13:30, Michael S. Tsirkin wrote:
> Programming vids (adding or removing them) still passes
> guest-endian values in the DMA buffer. That's wrong
> if guest is big-endian and when virtio 1 is enabled.
>
> Note: this is on top of a previous patch:
> 	virtio_net: split out ctrl buffer
>
> Fixes: 9465a7a6f ("virtio_net: enable v1.0 support")
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>   drivers/net/virtio_net.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 3d0eff53..f84fe04 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -154,7 +154,7 @@ struct control_buf {
>   	struct virtio_net_ctrl_mq mq;
>   	u8 promisc;
>   	u8 allmulti;
> -	u16 vid;
> +	__virtio16 vid;
>   	u64 offloads;
>   };
>   
> @@ -1718,7 +1718,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>   	struct virtnet_info *vi = netdev_priv(dev);
>   	struct scatterlist sg;
>   
> -	vi->ctrl->vid = vid;
> +	vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
>   	sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
> @@ -1733,7 +1733,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
>   	struct virtnet_info *vi = netdev_priv(dev);
>   	struct scatterlist sg;
>   
> -	vi->ctrl->vid = vid;
> +	vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
>   	sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
>   
>   	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,

Acked-by: Jason Wang <jasowang@redhat.com>

^ permalink raw reply

* Re: [PATCH bpf-next v3 5/8] bpf: add documentation for eBPF helpers (33-41)
From: Daniel Borkmann @ 2018-04-19 12:27 UTC (permalink / raw)
  To: Quentin Monnet, ast; +Cc: netdev, oss-drivers, linux-doc, linux-man
In-Reply-To: <20180417143438.7018-6-quentin.monnet@netronome.com>

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions, all
> written by Daniel:
> 
> - bpf_get_hash_recalc()
> - bpf_skb_change_tail()
> - bpf_skb_pull_data()
> - bpf_csum_update()
> - bpf_set_hash_invalid()
> - bpf_get_numa_node_id()
> - bpf_set_hash()
> - bpf_skb_adjust_room()
> - bpf_xdp_adjust_meta()
> 
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
> ---
>  include/uapi/linux/bpf.h | 155 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 155 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d748f65a8f58..3a40f5debac2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -965,9 +965,164 @@ union bpf_attr {
>   * 	Return
>   * 		0 on success, or a negative error in case of failure.
>   *
> + * u32 bpf_get_hash_recalc(struct sk_buff *skb)
> + * 	Description
> + * 		Retrieve the hash of the packet, *skb*\ **->hash**. If it is
> + * 		not set, in particular if the hash was cleared due to mangling,
> + * 		recompute this hash. Later accesses to the hash can be done
> + * 		directly with *skb*\ **->hash**.
> + *
> + * 		Calling **bpf_set_hash_invalid**\ (), changing a packet
> + * 		prototype with **bpf_skb_change_proto**\ (), or calling
> + * 		**bpf_skb_store_bytes**\ () with the
> + * 		**BPF_F_INVALIDATE_HASH** are actions susceptible to clear
> + * 		the hash and to trigger a new computation for the next call to
> + * 		**bpf_get_hash_recalc**\ ().
> + * 	Return
> + * 		The 32-bit hash.
> + *
>   * u64 bpf_get_current_task(void)
>   * 	Return
>   * 		A pointer to the current task struct.
> + *
> + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
> + * 	Description
> + * 		Resize (trim or grow) the packet associated to *skb* to the
> + * 		new *len*. The *flags* are reserved for future usage, and must
> + * 		be left at zero.
> + *
> + * 		The basic idea is that the helper performs the needed work to
> + * 		change the size of the packet, then the eBPF program rewrites
> + * 		the rest via helpers like **bpf_skb_store_bytes**\ (),
> + * 		**bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
> + * 		and others. This helper is a slow path utility intended for
> + * 		replies with control messages. And because it is targeted for
> + * 		slow path, the helper itself can afford to be slow: it
> + * 		implicitly linearizes, unclones and drops offloads from the
> + * 		*skb*.
> + *
> + * 		A call to this helper is susceptible to change data from the
> + * 		packet. Therefore, at load time, all checks on pointers
> + * 		previously done by the verifier are invalidated and must be
> + * 		performed again.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
> + *
> + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
> + * 	Description
> + * 		Pull in non-linear data in case the *skb* is non-linear and not
> + * 		all of *len* are part of the linear section. Make *len* bytes
> + * 		from *skb* readable and writable. If a zero value is passed for
> + * 		*len*, then the whole length of the *skb* is pulled.
> + *
> + * 		This helper is only needed for reading and writing with direct
> + * 		packet access.
> + *
> + * 		For direct packet access, when testing that offsets to access
> + * 		are within packet boundaries (test on *skb*\ **->data_end**)
> + * 		fails, programs just bail out, or, in the direct read case, use

I would add here to why it can fail, meaning either due to invalid offsets
or due to the requested data being in non-linear parts of the skb where then
either the bpf_skb_load_bytes() can be used as you mentioned or the data
pulled in via bpf_skb_pull_data().

> + * 		**bpf_skb_load_bytes()** as an alternative to overcome this
> + * 		limitation. If such data sits in non-linear parts, it is
> + * 		possible to pull them in once with the new helper, retest and
> + * 		eventually access them.

You do this here, but maybe slightly rearranging this one paragraph a bit as
to why one would use either of the helpers would help reading flow a bit.

> + * 		At the same time, this also makes sure the skb is uncloned,
> + * 		which is a necessary condition for direct write. As this needs
> + * 		to be an invariant for the write part only, the verifier
> + * 		detects writes and adds a prologue that is calling
> + * 		**bpf_skb_pull_data()** to effectively unclone the skb from the
> + * 		very beginning in case it is indeed cloned.
> + *
> + * 		A call to this helper is susceptible to change data from the
> + * 		packet. Therefore, at load time, all checks on pointers
> + * 		previously done by the verifier are invalidated and must be
> + * 		performed again.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
> + *
> + * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
> + * 	Description
> + * 		Add the checksum *csum* into *skb*\ **->csum** in case the
> + * 		driver fed us an IP checksum. Return an error otherwise. This

It's not IP checksum specifically (if that is what you meant), it's when the
driver propagates CHECKSUM_COMPLETE to the skb, where the device has supplied
the checksum of the whole packet into skb->csum. At TC ingress time, this
covers everything starting from net header offset to the end of the skb since
mac hdr skb->csum has been pulled already. Main use case indeed direct packet
access.

> + * 		header is intended to be used in combination with
> + * 		**bpf_csum_diff()** helper, in particular when the checksum
> + * 		needs to be updated after data has been written into the packet
> + * 		through direct packet access.
> + * 	Return
> + * 		The checksum on success, or a negative error code in case of
> + * 		failure.
> + *
> + * void bpf_set_hash_invalid(struct sk_buff *skb)
> + * 	Description
> + * 		Invalidate the current *skb*\ **->hash**. It can be used after
> + * 		mangling on headers through direct packet access, in order to
> + * 		indicate that the hash is outdated and to trigger a
> + * 		recalculation the next time the kernel tries to access this
> + * 		hash.

[...] hash or through the helper bpf_get_hash_recalc().

> + *
> + * int bpf_get_numa_node_id(void)
> + * 	Description
> + * 		Return the id of the current NUMA node. The primary use case
> + * 		for this helper is the selection of sockets for the local NUMA
> + * 		node, when the program is attached to sockets using the
> + * 		**SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**).

I would mention that this also available for other types similarly to
bpf_get_smp_processor_id() helper though. (Otherwise one might read that
this could not be the case.)

> + * 	Return
> + * 		The id of current NUMA node.
> + *
> + * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
> + * 	Description
> + * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
> + * 		to value *hash*.
> + * 	Return
> + * 		0
> + *
> + * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
> + * 	Description
> + * 		Grow or shrink the room for data in the packet associated to
> + * 		*skb* by *len_diff*, and according to the selected *mode*.
> + *
> + * 		There is a single supported mode at this time:
> + *
> + * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
> + * 		  (room space is added or removed below the layer 3 header).
> + *
> + * 		All values for *flags* are reserved for future usage, and must
> + * 		be left at zero.
> + *
> + * 		A call to this helper is susceptible to change data from the
> + * 		packet. Therefore, at load time, all checks on pointers
> + * 		previously done by the verifier are invalidated and must be
> + * 		performed again.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
> + *
> + * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
> + * 	Description
> + * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
> + * 		*delta* (which can be positive or negative). Note that this
> + * 		operation modifies the address stored in *xdp_md*\ **->data**,
> + * 		so the latter must be loaded only after the helper has been
> + * 		called.
> + *
> + * 		The use of *xdp_md*\ **->data_meta** is optional and programs
> + * 		are not required to use it. The rationale is that when the
> + * 		packet is processed with XDP (e.g. as DoS filter), it is
> + * 		possible to push further meta data along with it before passing
> + * 		to the stack, and to give the guarantee that an ingress eBPF
> + * 		program attached as a TC classifier on the same device can pick
> + * 		this up for further post-processing. Since TC works with socket
> + * 		buffers, it remains possible to set from XDP the **mark** or
> + * 		**priority** pointers, or other pointers for the socket buffer.
> + * 		Having this scratch space generic and programmable allows for
> + * 		more flexibility as the user is free to store whatever meta
> + * 		data they need.
> + *
> + * 		A call to this helper is susceptible to change data from the
> + * 		packet. Therefore, at load time, all checks on pointers
> + * 		previously done by the verifier are invalidated and must be
> + * 		performed again.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> 

^ permalink raw reply

* Re: [PATCH v2 net 3/3] virtio_net: sparse annotation fix
From: Jason Wang @ 2018-04-19 12:27 UTC (permalink / raw)
  To: Michael S. Tsirkin, linux-kernel
  Cc: Mikulas Patocka, Eric Dumazet, David Miller, Thomas Huth,
	Cornelia Huck, virtualization, netdev
In-Reply-To: <1524115776-334953-4-git-send-email-mst@redhat.com>



On 2018年04月19日 13:30, Michael S. Tsirkin wrote:
> offloads is a buffer in virtio format, should use
> the __virtio64 tag.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>   drivers/net/virtio_net.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index f84fe04..c5b11f2 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -155,7 +155,7 @@ struct control_buf {
>   	u8 promisc;
>   	u8 allmulti;
>   	__virtio16 vid;
> -	u64 offloads;
> +	__virtio64 offloads;
>   };
>   
>   struct virtnet_info {

Acked-by: Jason Wang <jasowang@redhat.com>

^ permalink raw reply

* Re: [RFC PATCH ghak32 V2 09/13] audit: add containerid support for config/feature/user records
From: Richard Guy Briggs @ 2018-04-19 12:31 UTC (permalink / raw)
  To: Paul Moore
  Cc: cgroups, containers, linux-api, Linux-Audit Mailing List,
	linux-fsdevel, LKML, netdev, ebiederm, luto, jlayton, carlos,
	dhowells, viro, simo, Eric Paris, serge
In-Reply-To: <CAHC9VhQ-i5oA48sXXnN2fP06t5=9-NMoY0bKcGXorQw2k=CK0Q@mail.gmail.com>

On 2018-04-18 21:27, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > Add container ID auxiliary records to configuration change, feature set change
> > and user generated standalone records.
> >
> > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > ---
> >  kernel/audit.c       | 50 ++++++++++++++++++++++++++++++++++++++++----------
> >  kernel/auditfilter.c |  5 ++++-
> >  2 files changed, 44 insertions(+), 11 deletions(-)
> >
> > diff --git a/kernel/audit.c b/kernel/audit.c
> > index b238be5..08662b4 100644
> > --- a/kernel/audit.c
> > +++ b/kernel/audit.c
> > @@ -400,8 +400,9 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
> >  {
> >         struct audit_buffer *ab;
> >         int rc = 0;
> > +       struct audit_context *context = audit_alloc_local();
> 
> We should be able to use current->audit_context here right?  If we
> can't for every caller, perhaps we pass an audit_context as an
> argument and only allocate a local context when the passed
> audit_context is NULL.
> 
> Also, if you're not comfortable always using current, just pass the
> audit_context as you do with audit_log_common_recv_msg().

As mentioned in the tree/watch/mark patch, this is all obsoleted by
making the AUDIT_CONFIG_CHANGE record a SYSCALL auxiliary record.
This review would have been more helpful a month and a half ago.

> > -       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > +       ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> >         if (unlikely(!ab))
> >                 return rc;
> >         audit_log_format(ab, "%s=%u old=%u", function_name, new, old);
> > @@ -411,6 +412,8 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
> >                 allow_changes = 0; /* Something weird, deny request */
> >         audit_log_format(ab, " res=%d", allow_changes);
> >         audit_log_end(ab);
> > +       audit_log_container_info(context, "config", audit_get_containerid(current));
> > +       audit_free_context(context);
> >         return rc;
> >  }
> >
> > @@ -1058,7 +1061,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
> >         return err;
> >  }
> >
> > -static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
> > +static void audit_log_common_recv_msg(struct audit_context *context,
> > +                                     struct audit_buffer **ab, u16 msg_type)
> >  {
> >         uid_t uid = from_kuid(&init_user_ns, current_uid());
> >         pid_t pid = task_tgid_nr(current);
> > @@ -1068,7 +1072,7 @@ static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
> >                 return;
> >         }
> >
> > -       *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
> > +       *ab = audit_log_start(context, GFP_KERNEL, msg_type);
> >         if (unlikely(!*ab))
> >                 return;
> >         audit_log_format(*ab, "pid=%d uid=%u", pid, uid);
> > @@ -1097,11 +1101,12 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
> >                                      u32 old_lock, u32 new_lock, int res)
> >  {
> >         struct audit_buffer *ab;
> > +       struct audit_context *context = audit_alloc_local();
> 
> So I know based on the other patch we are currently discussing that we
> can use current here ...
> 
> >         if (audit_enabled == AUDIT_OFF)
> >                 return;
> >
> > -       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
> > +       ab = audit_log_start(context, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
> >         if (!ab)
> >                 return;
> >         audit_log_task_info(ab, current);
> > @@ -1109,6 +1114,8 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
> >                          audit_feature_names[which], !!old_feature, !!new_feature,
> >                          !!old_lock, !!new_lock, res);
> >         audit_log_end(ab);
> > +       audit_log_container_info(context, "feature", audit_get_containerid(current));
> > +       audit_free_context(context);
> >  }
> >
> >  static int audit_set_feature(struct sk_buff *skb)
> > @@ -1337,13 +1344,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >
> >                 err = audit_filter(msg_type, AUDIT_FILTER_USER);
> >                 if (err == 1) { /* match or error */
> > +                       struct audit_context *context = audit_alloc_local();
> 
> I'm pretty sure we can use current here.
> 
> >                         err = 0;
> >                         if (msg_type == AUDIT_USER_TTY) {
> >                                 err = tty_audit_push();
> >                                 if (err)
> >                                         break;
> >                         }
> > -                       audit_log_common_recv_msg(&ab, msg_type);
> > +                       audit_log_common_recv_msg(context, &ab, msg_type);
> >                         if (msg_type != AUDIT_USER_TTY)
> >                                 audit_log_format(ab, " msg='%.*s'",
> >                                                  AUDIT_MESSAGE_TEXT_MAX,
> > @@ -1359,6 +1368,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >                                 audit_log_n_untrustedstring(ab, data, size);
> >                         }
> >                         audit_log_end(ab);
> > +                       audit_log_container_info(context, "user",
> > +                                                audit_get_containerid(current));
> > +                       audit_free_context(context);
> >                 }
> >                 break;
> >         case AUDIT_ADD_RULE:
> > @@ -1366,9 +1378,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >                 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
> >                         return -EINVAL;
> >                 if (audit_enabled == AUDIT_LOCKED) {
> > -                       audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
> > +                       struct audit_context *context = audit_alloc_local();
> 
> Pretty sure current can be used here too.  In fact I think everywhere
> where we are processing commands from netlink we can use current as I
> believe the entire netlink stack is processed in the context of the
> caller.
> 
> > +                       audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
> >                         audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
> >                         audit_log_end(ab);
> > +                       audit_log_container_info(context, "config",
> > +                                                audit_get_containerid(current));
> > +                       audit_free_context(context);
> >                         return -EPERM;
> >                 }
> >                 err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
> > @@ -1376,17 +1393,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >         case AUDIT_LIST_RULES:
> >                 err = audit_list_rules_send(skb, seq);
> >                 break;
> > -       case AUDIT_TRIM:
> > +       case AUDIT_TRIM: {
> > +               struct audit_context *context = audit_alloc_local();
> 
> Same.
> 
> >                 audit_trim_trees();
> > -               audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
> > +               audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
> >                 audit_log_format(ab, " op=trim res=1");
> >                 audit_log_end(ab);
> > +               audit_log_container_info(context, "config",
> > +                                        audit_get_containerid(current));
> > +               audit_free_context(context);
> >                 break;
> > +       }
> >         case AUDIT_MAKE_EQUIV: {
> >                 void *bufp = data;
> >                 u32 sizes[2];
> >                 size_t msglen = nlmsg_len(nlh);
> >                 char *old, *new;
> > +               struct audit_context *context = audit_alloc_local();
> 
> Same.
> 
> >                 err = -EINVAL;
> >                 if (msglen < 2 * sizeof(u32))
> > @@ -1408,7 +1431,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >                 /* OK, here comes... */
> >                 err = audit_tag_tree(old, new);
> >
> > -               audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
> > +               audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
> >
> >                 audit_log_format(ab, " op=make_equiv old=");
> >                 audit_log_untrustedstring(ab, old);
> > @@ -1418,6 +1441,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >                 audit_log_end(ab);
> >                 kfree(old);
> >                 kfree(new);
> > +               audit_log_container_info(context, "config",
> > +                                        audit_get_containerid(current));
> > +               audit_free_context(context);
> >                 break;
> >         }
> >         case AUDIT_SIGNAL_INFO:
> > @@ -1459,6 +1485,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >                 struct audit_tty_status s, old;
> >                 struct audit_buffer     *ab;
> >                 unsigned int t;
> > +               struct audit_context *context = audit_alloc_local();
> 
> Same.
> 
> >                 memset(&s, 0, sizeof(s));
> >                 /* guard against past and future API changes */
> > @@ -1477,12 +1504,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> >                 old.enabled = t & AUDIT_TTY_ENABLE;
> >                 old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);
> >
> > -               audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
> > +               audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
> >                 audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d"
> >                                  " old-log_passwd=%d new-log_passwd=%d res=%d",
> >                                  old.enabled, s.enabled, old.log_passwd,
> >                                  s.log_passwd, !err);
> >                 audit_log_end(ab);
> > +               audit_log_container_info(context, "config",
> > +                                        audit_get_containerid(current));
> > +               audit_free_context(context);
> >                 break;
> >         }
> >         default:
> > diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
> > index c4c8746..5f7f4d6 100644
> > --- a/kernel/auditfilter.c
> > +++ b/kernel/auditfilter.c
> > @@ -1109,11 +1109,12 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
> >         struct audit_buffer *ab;
> >         uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
> >         unsigned int sessionid = audit_get_sessionid(current);
> > +       struct audit_context *context = audit_alloc_local();
> >
> >         if (!audit_enabled)
> >                 return;
> 
> Well, first I think we should be able to get rid of the local context,
> but if for some reason we can't use current->audit_context then do the
> allocation after the audit_enabled check.
> 
> > -       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > +       ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> >         if (!ab)
> >                 return;
> >         audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
> > @@ -1122,6 +1123,8 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
> >         audit_log_key(ab, rule->filterkey);
> >         audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
> >         audit_log_end(ab);
> > +       audit_log_container_info(context, "config", audit_get_containerid(current));
> > +       audit_free_context(context);
> >  }
> 
> -- 
> paul moore
> www.paul-moore.com

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* [PATCH net-next 0/4] geneve: verify user specified MTU or adjust with a lower device
From: Alexey Kodanev @ 2018-04-19 12:42 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Alexey Kodanev

The first two patches don't introduce any functional changes and
contain minor cleanups for code readability.

The last one adds a new function geneve_link_config() similar to the
other tunnels. The function will be used on a new link creation or
when 'remote' parameter is changed. It adjusts a user specified MTU
or, if it finds a lower device, tunes the tunnel MTU using it.

Alexey Kodanev (4):
  geneve: remove white-space before '#if IS_ENABLED(CONFIG_IPV6)'
  geneve: cleanup hard coded value for Ethernet header length
  geneve: check MTU for a minimum in geneve_change_mtu()
  geneve: configure MTU based on a lower device

 drivers/net/geneve.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 11 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH net-next 1/4] geneve: remove white-space before '#if IS_ENABLED(CONFIG_IPV6)'
From: Alexey Kodanev @ 2018-04-19 12:42 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Alexey Kodanev
In-Reply-To: <1524141752-25789-1-git-send-email-alexey.kodanev@oracle.com>

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
 drivers/net/geneve.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89..45acdc9 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1261,7 +1261,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
 	}
 
 	if (data[IFLA_GENEVE_REMOTE6]) {
- #if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
 			attrtype = IFLA_GENEVE_REMOTE6;
 			goto change_notsup;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 2/4] geneve: cleanup hard coded value for Ethernet header length
From: Alexey Kodanev @ 2018-04-19 12:42 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Alexey Kodanev
In-Reply-To: <1524141752-25789-1-git-send-email-alexey.kodanev@oracle.com>

Use ETH_HLEN instead and introduce two new macros: GENEVE_IPV4_HLEN
and GENEVE_IPV6_HLEN that include Ethernet header length, corresponded
IP header length and GENEVE_BASE_HLEN.

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
 drivers/net/geneve.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 45acdc9..b650f84 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -36,6 +36,8 @@
 
 #define GENEVE_VER 0
 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
+#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
 
 /* per-network namespace private data for this module */
 struct geneve_net {
@@ -826,8 +828,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 		return PTR_ERR(rt);
 
 	if (skb_dst(skb)) {
-		int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
-			  GENEVE_BASE_HLEN - info->options_len - 14;
+		int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
+			  info->options_len;
 
 		skb_dst_update_pmtu(skb, mtu);
 	}
@@ -872,8 +874,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 		return PTR_ERR(dst);
 
 	if (skb_dst(skb)) {
-		int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
-			  GENEVE_BASE_HLEN - info->options_len - 14;
+		int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
 
 		skb_dst_update_pmtu(skb, mtu);
 	}
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 3/4] geneve: check MTU for a minimum in geneve_change_mtu()
From: Alexey Kodanev @ 2018-04-19 12:42 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Alexey Kodanev
In-Reply-To: <1524141752-25789-1-git-send-email-alexey.kodanev@oracle.com>

geneve_change_mtu() will be used not only as ndo_change_mtu() callback,
but also to verify a user specified MTU on a new link creation in the
next patch.

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
 drivers/net/geneve.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b650f84..ae649f6 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -942,11 +942,10 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
 {
-	/* Only possible if called internally, ndo_change_mtu path's new_mtu
-	 * is guaranteed to be between dev->min_mtu and dev->max_mtu.
-	 */
 	if (new_mtu > dev->max_mtu)
 		new_mtu = dev->max_mtu;
+	else if (new_mtu < dev->min_mtu)
+		new_mtu = dev->min_mtu;
 
 	dev->mtu = new_mtu;
 	return 0;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 4/4] geneve: configure MTU based on a lower device
From: Alexey Kodanev @ 2018-04-19 12:42 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Alexey Kodanev
In-Reply-To: <1524141752-25789-1-git-send-email-alexey.kodanev@oracle.com>

Currently, on a new link creation or when 'remote' address parameter
is updated, an MTU is not changed and always equals 1500. When a lower
device has a larger MTU, it might not be efficient, e.g. for UDP, and
requires the manual MTU adjustments to match the MTU of the lower
device.

This patch tries to automate this process, finds a lower device using
the 'remote' address parameter, then uses its MTU to tune GENEVE's MTU:
  * on a new link creation
  * when 'remote' parameter is changed

Also with this patch, the MTU from a user, on a new link creation, is
passed to geneve_change_mtu() where it is verified, and MTU adjustments
with a lower device is skipped in that case. Prior that change, it was
possible to set the invalid MTU values on a new link creation.

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
 drivers/net/geneve.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index ae649f6..750eaa5 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1387,6 +1387,48 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
 	return -EOPNOTSUPP;
 }
 
+static void geneve_link_config(struct net_device *dev,
+			       struct ip_tunnel_info *info, struct nlattr *tb[])
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	int ldev_mtu = 0;
+
+	if (tb[IFLA_MTU]) {
+		geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		return;
+	}
+
+	switch (ip_tunnel_info_af(info)) {
+	case AF_INET: {
+		struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
+		struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
+
+		if (!IS_ERR(rt) && rt->dst.dev) {
+			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
+			ip_rt_put(rt);
+		}
+		break;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6: {
+		struct rt6_info *rt = rt6_lookup(geneve->net,
+						 &info->key.u.ipv6.dst, NULL, 0,
+						 NULL, 0);
+
+		if (rt && rt->dst.dev)
+			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
+		ip6_rt_put(rt);
+		break;
+	}
+#endif
+	}
+
+	if (ldev_mtu <= 0)
+		return;
+
+	geneve_change_mtu(dev, ldev_mtu - info->options_len);
+}
+
 static int geneve_newlink(struct net *net, struct net_device *dev,
 			  struct nlattr *tb[], struct nlattr *data[],
 			  struct netlink_ext_ack *extack)
@@ -1402,8 +1444,14 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (err)
 		return err;
 
-	return geneve_configure(net, dev, extack, &info, metadata,
-				use_udp6_rx_checksums);
+	err = geneve_configure(net, dev, extack, &info, metadata,
+			       use_udp6_rx_checksums);
+	if (err)
+		return err;
+
+	geneve_link_config(dev, &info, tb);
+
+	return 0;
 }
 
 /* Quiesces the geneve device data path for both TX and RX.
@@ -1477,8 +1525,10 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
 	if (err)
 		return err;
 
-	if (!geneve_dst_addr_equal(&geneve->info, &info))
+	if (!geneve_dst_addr_equal(&geneve->info, &info)) {
 		dst_cache_reset(&info.dst_cache);
+		geneve_link_config(dev, &info, tb);
+	}
 
 	geneve_quiesce(geneve, &gs4, &gs6);
 	geneve->info = info;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH bpf-next v3 6/8] bpf: add documentation for eBPF helpers (42-50)
From: Daniel Borkmann @ 2018-04-19 12:40 UTC (permalink / raw)
  To: Quentin Monnet, ast
  Cc: netdev, oss-drivers, linux-doc, linux-man, Kaixu Xia,
	Martin KaFai Lau, Sargun Dhillon, Thomas Graf, Gianluca Borello,
	Chenbo Feng
In-Reply-To: <20180417143438.7018-7-quentin.monnet@netronome.com>

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions:
> 
> Helper from Kaixu:
> - bpf_perf_event_read()
> 
> Helpers from Martin:
> - bpf_skb_under_cgroup()
> - bpf_xdp_adjust_head()
> 
> Helpers from Sargun:
> - bpf_probe_write_user()
> - bpf_current_task_under_cgroup()
> 
> Helper from Thomas:
> - bpf_skb_change_head()
> 
> Helper from Gianluca:
> - bpf_probe_read_str()
> 
> Helpers from Chenbo:
> - bpf_get_socket_cookie()
> - bpf_get_socket_uid()
> 
> v3:
> - bpf_perf_event_read(): Fix time of selection for perf event type in
>   description. Remove occurences of "cores" to avoid confusion with
>   "CPU".
> 
> Cc: Kaixu Xia <xiakaixu@huawei.com>
> Cc: Martin KaFai Lau <kafai@fb.com>
> Cc: Sargun Dhillon <sargun@sargun.me>
> Cc: Thomas Graf <tgraf@suug.ch>
> Cc: Gianluca Borello <g.borello@gmail.com>
> Cc: Chenbo Feng <fengc@google.com>
> Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
> ---
>  include/uapi/linux/bpf.h | 158 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 158 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3a40f5debac2..dd79a1c82adf 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -753,6 +753,25 @@ union bpf_attr {
>   * 	Return
>   * 		0 on success, or a negative error in case of failure.
>   *
> + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
> + * 	Description
> + * 		Read the value of a perf event counter. This helper relies on a
> + * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
> + * 		the perf event counter is selected when *map* is updated with
> + * 		perf event file descriptors. The *map* is an array whose size
> + * 		is the number of available CPUs, and each cell contains a value
> + * 		relative to one CPU. The value to retrieve is indicated by
> + * 		*flags*, that contains the index of the CPU to look up, masked
> + * 		with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
> + * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
> + * 		current CPU should be retrieved.
> + *
> + * 		Note that before Linux 4.13, only hardware perf event can be
> + * 		retrieved.
> + * 	Return
> + * 		The value of the perf event counter read from the map, or a
> + * 		negative error code in case of failure.
> + *
>   * int bpf_redirect(u32 ifindex, u64 flags)
>   * 	Description
>   * 		Redirect the packet to another net device of index *ifindex*.
> @@ -965,6 +984,17 @@ union bpf_attr {
>   * 	Return
>   * 		0 on success, or a negative error in case of failure.
>   *
> + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
> + * 	Description
> + * 		Check whether *skb* is a descendant of the cgroup2 held by
> + * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
> + * 	Return
> + * 		The return value depends on the result of the test, and can be:
> + *
> + * 		* 0, if the *skb* failed the cgroup2 descendant test.
> + * 		* 1, if the *skb* succeeded the cgroup2 descendant test.
> + * 		* A negative error code, if an error occurred.
> + *
>   * u32 bpf_get_hash_recalc(struct sk_buff *skb)
>   * 	Description
>   * 		Retrieve the hash of the packet, *skb*\ **->hash**. If it is
> @@ -985,6 +1015,37 @@ union bpf_attr {
>   * 	Return
>   * 		A pointer to the current task struct.
>   *
> + * int bpf_probe_write_user(void *dst, const void *src, u32 len)
> + * 	Description
> + * 		Attempt in a safe way to write *len* bytes from the buffer
> + * 		*src* to *dst* in memory. It only works for threads that are in
> + * 		user context.

Plus the dst address must be a valid user space address.

> + * 		This helper should not be used to implement any kind of
> + * 		security mechanism because of TOC-TOU attacks, but rather to
> + * 		debug, divert, and manipulate execution of semi-cooperative
> + * 		processes.
> + *
> + * 		Keep in mind that this feature is meant for experiments, and it
> + * 		has a risk of crashing the system and running programs.

Ditto, crashing user space applications.

> + * 		Therefore, when an eBPF program using this helper is attached,
> + * 		a warning including PID and process name is printed to kernel
> + * 		logs.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
> + *
> + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
> + * 	Description
> + * 		Check whether the probe is being run is the context of a given
> + * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
> + * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
> + * 	Return
> + * 		The return value depends on the result of the test, and can be:
> + *
> + * 		* 0, if the *skb* task belongs to the cgroup2.
> + * 		* 1, if the *skb* task does not belong to the cgroup2.
> + * 		* A negative error code, if an error occurred.
> + *
>   * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
>   * 	Description
>   * 		Resize (trim or grow) the packet associated to *skb* to the
> @@ -1069,6 +1130,103 @@ union bpf_attr {
>   * 	Return
>   * 		The id of current NUMA node.
>   *
> + * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
> + * 	Description
> + * 		Grows headroom of packet associated to *skb* and adjusts the
> + * 		offset of the MAC header accordingly, adding *len* bytes of
> + * 		space. It automatically extends and reallocates memory as
> + * 		required.
> + *
> + * 		This helper can be used on a layer 3 *skb* to push a MAC header
> + * 		for redirection into a layer 2 device.
> + *
> + * 		All values for *flags* are reserved for future usage, and must
> + * 		be left at zero.
> + *
> + * 		A call to this helper is susceptible to change data from the
> + * 		packet. Therefore, at load time, all checks on pointers
> + * 		previously done by the verifier are invalidated and must be
> + * 		performed again.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
> + *
> + * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
> + * 	Description
> + * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
> + * 		it is possible to use a negative value for *delta*. This helper
> + * 		can be used to prepare the packet for pushing or popping
> + * 		headers.
> + *
> + * 		A call to this helper is susceptible to change data from the
> + * 		packet. Therefore, at load time, all checks on pointers
> + * 		previously done by the verifier are invalidated and must be
> + * 		performed again.
> + * 	Return
> + * 		0 on success, or a negative error in case of failure.
> + *
> + * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
> + * 	Description
> + * 		Copy a NUL terminated string from an unsafe address
> + * 		*unsafe_ptr* to *dst*. The *size* should include the
> + * 		terminating NUL byte. In case the string length is smaller than
> + * 		*size*, the target is not padded with further NUL bytes. If the
> + * 		string length is larger than *size*, just *size*-1 bytes are
> + * 		copied and the last byte is set to NUL.
> + *
> + * 		On success, the length of the copied string is returned. This
> + * 		makes this helper useful in tracing programs for reading
> + * 		strings, and more importantly to get its length at runtime. See
> + * 		the following snippet:
> + *
> + * 		::
> + *
> + * 			SEC("kprobe/sys_open")
> + * 			void bpf_sys_open(struct pt_regs *ctx)
> + * 			{
> + * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
> + * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
> + * 				                             ctx->di);
> + *
> + * 				// Consume buf, for example push it to
> + * 				// userspace via bpf_perf_event_output(); we
> + * 				// can use res (the string length) as event
> + * 				// size, after checking its boundaries.
> + * 			}
> + *
> + * 		In comparison, using **bpf_probe_read()** helper here instead
> + * 		to read the string would require to estimate the length at
> + * 		compile time, and would often result in copying more memory
> + * 		than necessary.
> + *
> + * 		Another useful use case is when parsing individual process
> + * 		arguments or individual environment variables navigating
> + * 		*current*\ **->mm->arg_start** and *current*\
> + * 		**->mm->env_start**: using this helper and the return value,
> + * 		one can quickly iterate at the right offset of the memory area.
> + * 	Return
> + * 		On success, the strictly positive length of the string,
> + * 		including the trailing NUL character. On error, a negative
> + * 		value.
> + *
> + * u64 bpf_get_socket_cookie(struct sk_buff *skb)
> + * 	Description
> + * 		Retrieve the socket cookie generated by the kernel from a
> + * 		**struct sk_buff** with a known socket. If none has been set
> + * 		yet, generate a new cookie. This helper can be useful for
> + * 		monitoring per socket networking traffic statistics as it
> + * 		provides a unique socket identifier per namespace.
> + * 	Return
> + * 		A 8-byte long non-decreasing number on success, or 0 if the
> + * 		socket field is missing inside *skb*.
> + *
> + * u32 bpf_get_socket_uid(struct sk_buff *skb)
> + * 	Return
> + * 		The owner UID of the socket associated to *skb*. If the socket
> + * 		is **NULL**, or if it is not a full socket (i.e. if it is a
> + * 		time-wait or a request socket instead), **overflowuid** value
> + * 		is returned (note that **overflowuid** might also be the actual
> + * 		UID value for the socket).
> + *
>   * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
>   * 	Description
>   * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
> 

^ permalink raw reply

* simplify procfs code for seq_file instances
From: Christoph Hellwig @ 2018-04-19 12:41 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: linux-rtc, Alessandro Zummo, Alexandre Belloni, devel,
	linux-kernel, linux-scsi, Corey Minyard, linux-ide,
	Greg Kroah-Hartman, jfs-discussion, linux-afs, linux-acpi, netdev,
	netfilter-devel, Jiri Slaby, linux-ext4, Alexey Dobriyan,
	megaraidlinux.pdl, drbd-dev

We currently have hundreds of proc files that implement plain, read-only
seq_file based interfaces.  This series consolidates them using new
procfs helpers that take the seq_operations or simple show callback
directly.

A git tree is available at:

    git://git.infradead.org/users/hch/misc.git proc_create

Gitweb:

    http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/proc_create

Diffstat:  268 files changed, 1193 insertions(+), 6194 deletions(-)

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

^ permalink raw reply

* [PATCH 01/39] net/can: single_open_net needs to be paired with single_release_net
From: Christoph Hellwig @ 2018-04-19 12:41 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: linux-rtc, Alessandro Zummo, Alexandre Belloni, devel,
	linux-kernel, linux-scsi, Corey Minyard, linux-ide,
	Greg Kroah-Hartman, jfs-discussion, linux-afs, linux-acpi, netdev,
	netfilter-devel, Jiri Slaby, linux-ext4, Alexey Dobriyan,
	megaraidlinux.pdl, drbd-dev
In-Reply-To: <20180419124140.9309-1-hch@lst.de>

Otherwise we will leak a reference to the network namespace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 net/can/bcm.c  | 2 +-
 net/can/proc.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e34fee3..8073fa14e143 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -249,7 +249,7 @@ static const struct file_operations bcm_proc_fops = {
 	.open		= bcm_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= single_release_net,
 };
 #endif /* CONFIG_PROC_FS */
 
diff --git a/net/can/proc.c b/net/can/proc.c
index fdf704e9bb8c..fde2fd55b826 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -279,7 +279,7 @@ static const struct file_operations can_stats_proc_fops = {
 	.open		= can_stats_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= single_release_net,
 };
 
 static int can_reset_stats_proc_show(struct seq_file *m, void *v)
@@ -449,7 +449,7 @@ static const struct file_operations can_rcvlist_sff_proc_fops = {
 	.open		= can_rcvlist_sff_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= single_release_net,
 };
 
 
@@ -492,7 +492,7 @@ static const struct file_operations can_rcvlist_eff_proc_fops = {
 	.open		= can_rcvlist_eff_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= single_release_net,
 };
 
 /*
-- 
2.17.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

^ permalink raw reply related

* [PATCH 05/39] ipv{4, 6}/udp{, lite}: simplify proc registration
From: Christoph Hellwig @ 2018-04-19 12:41 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: linux-rtc, Alessandro Zummo, Alexandre Belloni, devel,
	linux-kernel, linux-scsi, Corey Minyard, linux-ide,
	Greg Kroah-Hartman, jfs-discussion, linux-afs, linux-acpi, netdev,
	netfilter-devel, Jiri Slaby, linux-ext4, Alexey Dobriyan,
	megaraidlinux.pdl, drbd-dev
In-Reply-To: <20180419124140.9309-1-hch@lst.de>

Remove a couple indirections to make the code look like most other
protocols.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/net/udp.h  | 20 ++++------
 net/ipv4/udp.c     | 99 +++++++++++++++++-----------------------------
 net/ipv4/udplite.c | 21 +++-------
 net/ipv6/udp.c     | 30 +++++++++-----
 net/ipv6/udplite.c | 21 +++-------
 5 files changed, 78 insertions(+), 113 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 0676b272f6ac..093cd323f66a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -408,31 +408,27 @@ do {									\
 #define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0)
 #endif
 
-/* /proc */
-int udp_seq_open(struct inode *inode, struct file *file);
-
+#ifdef CONFIG_PROC_FS
 struct udp_seq_afinfo {
-	char				*name;
 	sa_family_t			family;
 	struct udp_table		*udp_table;
-	const struct file_operations	*seq_fops;
-	struct seq_operations		seq_ops;
 };
 
 struct udp_iter_state {
 	struct seq_net_private  p;
-	sa_family_t		family;
 	int			bucket;
-	struct udp_table	*udp_table;
 };
 
-#ifdef CONFIG_PROC_FS
-int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
-void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
+void *udp_seq_start(struct seq_file *seq, loff_t *pos);
+void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+void udp_seq_stop(struct seq_file *seq, void *v);
+
+extern const struct file_operations udp_afinfo_seq_fops;
+extern const struct file_operations udp6_afinfo_seq_fops;
 
 int udp4_proc_init(void);
 void udp4_proc_exit(void);
-#endif
+#endif /* CONFIG_PROC_FS */
 
 int udpv4_offload_init(void);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24b5c59b1c53..d48790ddb6cf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2579,12 +2579,13 @@ EXPORT_SYMBOL(udp_prot);
 static struct sock *udp_get_first(struct seq_file *seq, int start)
 {
 	struct sock *sk;
+	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	for (state->bucket = start; state->bucket <= state->udp_table->mask;
+	for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
 	     ++state->bucket) {
-		struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
+		struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
 
 		if (hlist_empty(&hslot->head))
 			continue;
@@ -2593,7 +2594,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 		sk_for_each(sk, &hslot->head) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
-			if (sk->sk_family == state->family)
+			if (sk->sk_family == afinfo->family)
 				goto found;
 		}
 		spin_unlock_bh(&hslot->lock);
@@ -2605,16 +2606,17 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 
 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 {
+	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
 	do {
 		sk = sk_next(sk);
-	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
+	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
 
 	if (!sk) {
-		if (state->bucket <= state->udp_table->mask)
-			spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
+		if (state->bucket <= afinfo->udp_table->mask)
+			spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
 		return udp_get_first(seq, state->bucket + 1);
 	}
 	return sk;
@@ -2630,15 +2632,16 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 	return pos ? NULL : sk;
 }
 
-static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
+void *udp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct udp_iter_state *state = seq->private;
 	state->bucket = MAX_UDP_PORTS;
 
 	return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
 }
+EXPORT_SYMBOL(udp_seq_start);
 
-static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct sock *sk;
 
@@ -2650,56 +2653,17 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	++*pos;
 	return sk;
 }
+EXPORT_SYMBOL(udp_seq_next);
 
-static void udp_seq_stop(struct seq_file *seq, void *v)
+void udp_seq_stop(struct seq_file *seq, void *v)
 {
+	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct udp_iter_state *state = seq->private;
 
-	if (state->bucket <= state->udp_table->mask)
-		spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
+	if (state->bucket <= afinfo->udp_table->mask)
+		spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
 }
-
-int udp_seq_open(struct inode *inode, struct file *file)
-{
-	struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
-	struct udp_iter_state *s;
-	int err;
-
-	err = seq_open_net(inode, file, &afinfo->seq_ops,
-			   sizeof(struct udp_iter_state));
-	if (err < 0)
-		return err;
-
-	s = ((struct seq_file *)file->private_data)->private;
-	s->family		= afinfo->family;
-	s->udp_table		= afinfo->udp_table;
-	return err;
-}
-EXPORT_SYMBOL(udp_seq_open);
-
-/* ------------------------------------------------------------------------ */
-int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
-{
-	struct proc_dir_entry *p;
-	int rc = 0;
-
-	afinfo->seq_ops.start		= udp_seq_start;
-	afinfo->seq_ops.next		= udp_seq_next;
-	afinfo->seq_ops.stop		= udp_seq_stop;
-
-	p = proc_create_data(afinfo->name, 0444, net->proc_net,
-			     afinfo->seq_fops, afinfo);
-	if (!p)
-		rc = -ENOMEM;
-	return rc;
-}
-EXPORT_SYMBOL(udp_proc_register);
-
-void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
-{
-	remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(udp_proc_unregister);
+EXPORT_SYMBOL(udp_seq_stop);
 
 /* ------------------------------------------------------------------------ */
 static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -2739,32 +2703,43 @@ int udp4_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static const struct file_operations udp_afinfo_seq_fops = {
+static const struct seq_operations udp_seq_ops = {
+	.start		= udp_seq_start,
+	.next		= udp_seq_next,
+	.stop		= udp_seq_stop,
+	.show		= udp4_seq_show,
+};
+
+static int udp_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &udp_seq_ops,
+			sizeof(struct udp_iter_state));
+}
+
+const struct file_operations udp_afinfo_seq_fops = {
 	.open     = udp_seq_open,
 	.read     = seq_read,
 	.llseek   = seq_lseek,
 	.release  = seq_release_net
 };
+EXPORT_SYMBOL(udp_afinfo_seq_fops);
 
-/* ------------------------------------------------------------------------ */
 static struct udp_seq_afinfo udp4_seq_afinfo = {
-	.name		= "udp",
 	.family		= AF_INET,
 	.udp_table	= &udp_table,
-	.seq_fops	= &udp_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp4_seq_show,
-	},
 };
 
 static int __net_init udp4_proc_init_net(struct net *net)
 {
-	return udp_proc_register(net, &udp4_seq_afinfo);
+	if (!proc_create_data("udp", 0444, net->proc_net, &udp_afinfo_seq_fops,
+			&udp4_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit udp4_proc_exit_net(struct net *net)
 {
-	udp_proc_unregister(net, &udp4_seq_afinfo);
+	remove_proc_entry("udp", net->proc_net);
 }
 
 static struct pernet_operations udp4_net_ops = {
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f96614e9b9a5..4a6e67bfbe0d 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "UDPLite: " fmt
 
 #include <linux/export.h>
+#include <linux/proc_fs.h>
 #include "udp_impl.h"
 
 struct udp_table 	udplite_table __read_mostly;
@@ -73,32 +74,22 @@ static struct inet_protosw udplite4_protosw = {
 };
 
 #ifdef CONFIG_PROC_FS
-
-static const struct file_operations udplite_afinfo_seq_fops = {
-	.open     = udp_seq_open,
-	.read     = seq_read,
-	.llseek   = seq_lseek,
-	.release  = seq_release_net
-};
-
 static struct udp_seq_afinfo udplite4_seq_afinfo = {
-	.name		= "udplite",
 	.family		= AF_INET,
 	.udp_table 	= &udplite_table,
-	.seq_fops	= &udplite_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp4_seq_show,
-	},
 };
 
 static int __net_init udplite4_proc_init_net(struct net *net)
 {
-	return udp_proc_register(net, &udplite4_seq_afinfo);
+	if (!proc_create_data("udplite", 0444, net->proc_net,
+			&udp_afinfo_seq_fops, &udplite4_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit udplite4_proc_exit_net(struct net *net)
 {
-	udp_proc_unregister(net, &udplite4_seq_afinfo);
+	remove_proc_entry("udplite", net->proc_net);
 }
 
 static struct pernet_operations udplite4_net_ops = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4ec76a87aeb8..68f11d6e3912 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1480,31 +1480,43 @@ int udp6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static const struct file_operations udp6_afinfo_seq_fops = {
-	.open     = udp_seq_open,
+static const struct seq_operations udp6_seq_ops = {
+	.start		= udp_seq_start,
+	.next		= udp_seq_next,
+	.stop		= udp_seq_stop,
+	.show		= udp6_seq_show,
+};
+
+static int udp6_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &udp6_seq_ops,
+			sizeof(struct udp_iter_state));
+}
+
+const struct file_operations udp6_afinfo_seq_fops = {
+	.open     = udp6_seq_open,
 	.read     = seq_read,
 	.llseek   = seq_lseek,
 	.release  = seq_release_net
 };
+EXPORT_SYMBOL(udp6_afinfo_seq_fops);
 
 static struct udp_seq_afinfo udp6_seq_afinfo = {
-	.name		= "udp6",
 	.family		= AF_INET6,
 	.udp_table	= &udp_table,
-	.seq_fops	= &udp6_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp6_seq_show,
-	},
 };
 
 int __net_init udp6_proc_init(struct net *net)
 {
-	return udp_proc_register(net, &udp6_seq_afinfo);
+	if (!proc_create_data("udp6", 0444, net->proc_net,
+			&udp6_afinfo_seq_fops, &udp6_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 void udp6_proc_exit(struct net *net)
 {
-	udp_proc_unregister(net, &udp6_seq_afinfo);
+	remove_proc_entry("udp6", net->proc_net);
 }
 #endif /* CONFIG_PROC_FS */
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 14ae32bb1f3d..a119e57196b5 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -12,6 +12,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 #include <linux/export.h>
+#include <linux/proc_fs.h>
 #include "udp_impl.h"
 
 static int udplitev6_rcv(struct sk_buff *skb)
@@ -92,32 +93,22 @@ void udplitev6_exit(void)
 }
 
 #ifdef CONFIG_PROC_FS
-
-static const struct file_operations udplite6_afinfo_seq_fops = {
-	.open     = udp_seq_open,
-	.read     = seq_read,
-	.llseek   = seq_lseek,
-	.release  = seq_release_net
-};
-
 static struct udp_seq_afinfo udplite6_seq_afinfo = {
-	.name		= "udplite6",
 	.family		= AF_INET6,
 	.udp_table	= &udplite_table,
-	.seq_fops	= &udplite6_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp6_seq_show,
-	},
 };
 
 static int __net_init udplite6_proc_init_net(struct net *net)
 {
-	return udp_proc_register(net, &udplite6_seq_afinfo);
+	if (!proc_create_data("udplite6", 0444, net->proc_net,
+			&udp6_afinfo_seq_fops, &udplite6_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit udplite6_proc_exit_net(struct net *net)
 {
-	udp_proc_unregister(net, &udplite6_seq_afinfo);
+	remove_proc_entry("udplite6", net->proc_net);
 }
 
 static struct pernet_operations udplite6_net_ops = {
-- 
2.17.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

^ permalink raw reply related

* [PATCH 06/39] ipv{4, 6}/tcp: simplify procfs registration
From: Christoph Hellwig @ 2018-04-19 12:41 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: linux-rtc, Alessandro Zummo, Alexandre Belloni, devel,
	linux-kernel, linux-scsi, Corey Minyard, linux-ide,
	Greg Kroah-Hartman, jfs-discussion, linux-afs, linux-acpi, netdev,
	netfilter-devel, Jiri Slaby, linux-ext4, Alexey Dobriyan,
	megaraidlinux.pdl, drbd-dev
In-Reply-To: <20180419124140.9309-1-hch@lst.de>

Avoid most of the afinfo indirections and just call the proc helpers
directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/net/tcp.h   | 11 ++----
 net/ipv4/tcp_ipv4.c | 85 +++++++++++++++++----------------------------
 net/ipv6/tcp_ipv6.c | 27 +++++++++-----
 3 files changed, 53 insertions(+), 70 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c9b3768b350..51dc7a26a2fa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1747,27 +1747,22 @@ enum tcp_seq_states {
 	TCP_SEQ_STATE_ESTABLISHED,
 };
 
-int tcp_seq_open(struct inode *inode, struct file *file);
+void *tcp_seq_start(struct seq_file *seq, loff_t *pos);
+void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+void tcp_seq_stop(struct seq_file *seq, void *v);
 
 struct tcp_seq_afinfo {
-	char				*name;
 	sa_family_t			family;
-	const struct file_operations	*seq_fops;
-	struct seq_operations		seq_ops;
 };
 
 struct tcp_iter_state {
 	struct seq_net_private	p;
-	sa_family_t		family;
 	enum tcp_seq_states	state;
 	struct sock		*syn_wait_sk;
 	int			bucket, offset, sbucket, num;
 	loff_t			last_pos;
 };
 
-int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
-void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
-
 extern struct request_sock_ops tcp_request_sock_ops;
 extern struct request_sock_ops tcp6_request_sock_ops;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f70586b50838..645f259d0972 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1961,6 +1961,7 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
  */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct inet_listen_hashbucket *ilb;
@@ -1983,7 +1984,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	sk_for_each_from(sk) {
 		if (!net_eq(sock_net(sk), net))
 			continue;
-		if (sk->sk_family == st->family)
+		if (sk->sk_family == afinfo->family)
 			return sk;
 	}
 	spin_unlock(&ilb->lock);
@@ -2020,6 +2021,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
  */
 static void *established_get_first(struct seq_file *seq)
 {
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
@@ -2036,7 +2038,7 @@ static void *established_get_first(struct seq_file *seq)
 
 		spin_lock_bh(lock);
 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-			if (sk->sk_family != st->family ||
+			if (sk->sk_family != afinfo->family ||
 			    !net_eq(sock_net(sk), net)) {
 				continue;
 			}
@@ -2051,6 +2053,7 @@ static void *established_get_first(struct seq_file *seq)
 
 static void *established_get_next(struct seq_file *seq, void *cur)
 {
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct sock *sk = cur;
 	struct hlist_nulls_node *node;
 	struct tcp_iter_state *st = seq->private;
@@ -2062,7 +2065,8 @@ static void *established_get_next(struct seq_file *seq, void *cur)
 	sk = sk_nulls_next(sk);
 
 	sk_nulls_for_each_from(sk, node) {
-		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
+		if (sk->sk_family == afinfo->family &&
+		    net_eq(sock_net(sk), net))
 			return sk;
 	}
 
@@ -2135,7 +2139,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
 	return rc;
 }
 
-static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
+void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
 	void *rc;
@@ -2156,8 +2160,9 @@ static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 	st->last_pos = *pos;
 	return rc;
 }
+EXPORT_SYMBOL(tcp_seq_start);
 
-static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
 	void *rc = NULL;
@@ -2186,8 +2191,9 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	st->last_pos = *pos;
 	return rc;
 }
+EXPORT_SYMBOL(tcp_seq_next);
 
-static void tcp_seq_stop(struct seq_file *seq, void *v)
+void tcp_seq_stop(struct seq_file *seq, void *v)
 {
 	struct tcp_iter_state *st = seq->private;
 
@@ -2202,47 +2208,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 		break;
 	}
 }
-
-int tcp_seq_open(struct inode *inode, struct file *file)
-{
-	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
-	struct tcp_iter_state *s;
-	int err;
-
-	err = seq_open_net(inode, file, &afinfo->seq_ops,
-			  sizeof(struct tcp_iter_state));
-	if (err < 0)
-		return err;
-
-	s = ((struct seq_file *)file->private_data)->private;
-	s->family		= afinfo->family;
-	s->last_pos		= 0;
-	return 0;
-}
-EXPORT_SYMBOL(tcp_seq_open);
-
-int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
-{
-	int rc = 0;
-	struct proc_dir_entry *p;
-
-	afinfo->seq_ops.start		= tcp_seq_start;
-	afinfo->seq_ops.next		= tcp_seq_next;
-	afinfo->seq_ops.stop		= tcp_seq_stop;
-
-	p = proc_create_data(afinfo->name, 0444, net->proc_net,
-			     afinfo->seq_fops, afinfo);
-	if (!p)
-		rc = -ENOMEM;
-	return rc;
-}
-EXPORT_SYMBOL(tcp_proc_register);
-
-void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
-{
-	remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(tcp_proc_unregister);
+EXPORT_SYMBOL(tcp_seq_stop);
 
 static void get_openreq4(const struct request_sock *req,
 			 struct seq_file *f, int i)
@@ -2377,6 +2343,19 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+static const struct seq_operations tcp4_seq_ops = {
+	.show		= tcp4_seq_show,
+	.start		= tcp_seq_start,
+	.next		= tcp_seq_next,
+	.stop		= tcp_seq_stop,
+};
+
+static int tcp_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &tcp4_seq_ops,
+			  sizeof(struct tcp_iter_state));
+}
+
 static const struct file_operations tcp_afinfo_seq_fops = {
 	.open    = tcp_seq_open,
 	.read    = seq_read,
@@ -2385,22 +2364,20 @@ static const struct file_operations tcp_afinfo_seq_fops = {
 };
 
 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
-	.name		= "tcp",
 	.family		= AF_INET,
-	.seq_fops	= &tcp_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= tcp4_seq_show,
-	},
 };
 
 static int __net_init tcp4_proc_init_net(struct net *net)
 {
-	return tcp_proc_register(net, &tcp4_seq_afinfo);
+	if (!proc_create_data("tcp", 0444, net->proc_net,
+			&tcp_afinfo_seq_fops, &tcp4_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit tcp4_proc_exit_net(struct net *net)
 {
-	tcp_proc_unregister(net, &tcp4_seq_afinfo);
+	remove_proc_entry("tcp", net->proc_net);
 }
 
 static struct pernet_operations tcp4_net_ops = {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6d664d83cd16..c0329bb1692f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1909,30 +1909,41 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+static const struct seq_operations tcp6_seq_ops = {
+	.show		= tcp6_seq_show,
+	.start		= tcp_seq_start,
+	.next		= tcp_seq_next,
+	.stop		= tcp_seq_stop,
+};
+
+static int tcp6_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &tcp6_seq_ops,
+			  sizeof(struct tcp_iter_state));
+}
+
 static const struct file_operations tcp6_afinfo_seq_fops = {
-	.open    = tcp_seq_open,
+	.open    = tcp6_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = seq_release_net
 };
 
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
-	.name		= "tcp6",
 	.family		= AF_INET6,
-	.seq_fops	= &tcp6_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= tcp6_seq_show,
-	},
 };
 
 int __net_init tcp6_proc_init(struct net *net)
 {
-	return tcp_proc_register(net, &tcp6_seq_afinfo);
+	if (!proc_create_data("tcp6", 0444, net->proc_net,
+			&tcp6_afinfo_seq_fops, &tcp6_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 void tcp6_proc_exit(struct net *net)
 {
-	tcp_proc_unregister(net, &tcp6_seq_afinfo);
+	remove_proc_entry("tcp6", net->proc_net);
 }
 #endif
 
-- 
2.17.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

^ permalink raw reply related

* [PATCH 07/39] ipv{4, 6}/ping: simplify proc file creation
From: Christoph Hellwig @ 2018-04-19 12:41 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: linux-rtc, Alessandro Zummo, Alexandre Belloni, devel,
	linux-kernel, linux-scsi, Corey Minyard, linux-ide,
	Greg Kroah-Hartman, jfs-discussion, linux-afs, linux-acpi, netdev,
	netfilter-devel, Jiri Slaby, linux-ext4, Alexey Dobriyan,
	megaraidlinux.pdl, drbd-dev
In-Reply-To: <20180419124140.9309-1-hch@lst.de>

Remove the pointless ping_seq_afinfo indirection and make the code look
like most other protocols.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/net/ping.h | 11 ----------
 net/ipv4/ping.c    | 50 +++++++++++++---------------------------------
 net/ipv6/ping.c    | 35 +++++++++++++++++++++-----------
 3 files changed, 37 insertions(+), 59 deletions(-)

diff --git a/include/net/ping.h b/include/net/ping.h
index 4cd90d6b5c25..fd080e043a6e 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -83,20 +83,9 @@ int  ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 bool ping_rcv(struct sk_buff *skb);
 
 #ifdef CONFIG_PROC_FS
-struct ping_seq_afinfo {
-	char				*name;
-	sa_family_t			family;
-	const struct file_operations	*seq_fops;
-	const struct seq_operations	seq_ops;
-};
-
-extern const struct file_operations ping_seq_fops;
-
 void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family);
 void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 void ping_seq_stop(struct seq_file *seq, void *v);
-int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo);
-void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo);
 
 int __init ping_proc_init(void);
 void ping_proc_exit(void);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 05e47d777009..83170ebf5dfc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1147,58 +1147,36 @@ static int ping_v4_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static int ping_seq_open(struct inode *inode, struct file *file)
+static const struct seq_operations ping_v4_seq_ops = {
+	.start		= ping_v4_seq_start,
+	.show		= ping_v4_seq_show,
+	.next		= ping_seq_next,
+	.stop		= ping_seq_stop,
+};
+
+static int ping_v4_seq_open(struct inode *inode, struct file *file)
 {
-	struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
-	return seq_open_net(inode, file, &afinfo->seq_ops,
+	return seq_open_net(inode, file, &ping_v4_seq_ops,
 			   sizeof(struct ping_iter_state));
 }
 
-const struct file_operations ping_seq_fops = {
-	.open		= ping_seq_open,
+const struct file_operations ping_v4_seq_fops = {
+	.open		= ping_v4_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release_net,
 };
-EXPORT_SYMBOL_GPL(ping_seq_fops);
-
-static struct ping_seq_afinfo ping_v4_seq_afinfo = {
-	.name		= "icmp",
-	.family		= AF_INET,
-	.seq_fops	= &ping_seq_fops,
-	.seq_ops	= {
-		.start		= ping_v4_seq_start,
-		.show		= ping_v4_seq_show,
-		.next		= ping_seq_next,
-		.stop		= ping_seq_stop,
-	},
-};
 
-int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
+static int __net_init ping_v4_proc_init_net(struct net *net)
 {
-	struct proc_dir_entry *p;
-	p = proc_create_data(afinfo->name, 0444, net->proc_net,
-			     afinfo->seq_fops, afinfo);
-	if (!p)
+	if (!proc_create("icmp", 0444, net->proc_net, &ping_v4_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ping_proc_register);
-
-void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo)
-{
-	remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(ping_proc_unregister);
-
-static int __net_init ping_v4_proc_init_net(struct net *net)
-{
-	return ping_proc_register(net, &ping_v4_seq_afinfo);
-}
 
 static void __net_exit ping_v4_proc_exit_net(struct net *net)
 {
-	ping_proc_unregister(net, &ping_v4_seq_afinfo);
+	remove_proc_entry("icmp", net->proc_net);
 }
 
 static struct pernet_operations ping_v4_net_ops = {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 746eeae7f581..45d5c8e0f2bf 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -24,6 +24,7 @@
 #include <net/protocol.h>
 #include <net/udp.h>
 #include <net/transp_v6.h>
+#include <linux/proc_fs.h>
 #include <net/ping.h>
 
 /* Compatibility glue so we can support IPv6 when it's compiled as a module */
@@ -215,26 +216,36 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct ping_seq_afinfo ping_v6_seq_afinfo = {
-	.name		= "icmp6",
-	.family		= AF_INET6,
-	.seq_fops       = &ping_seq_fops,
-	.seq_ops	= {
-		.start		= ping_v6_seq_start,
-		.show		= ping_v6_seq_show,
-		.next		= ping_seq_next,
-		.stop		= ping_seq_stop,
-	},
+static const struct seq_operations ping_v6_seq_ops = {
+	.start		= ping_v6_seq_start,
+	.show		= ping_v6_seq_show,
+	.next		= ping_seq_next,
+	.stop		= ping_seq_stop,
+};
+
+static int ping_v6_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ping_v6_seq_ops,
+			   sizeof(struct ping_iter_state));
+}
+
+const struct file_operations ping_v6_seq_fops = {
+	.open		= ping_v6_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
 };
 
 static int __net_init ping_v6_proc_init_net(struct net *net)
 {
-	return ping_proc_register(net, &ping_v6_seq_afinfo);
+	if (!proc_create("icmp6", 0444, net->proc_net, &ping_v6_seq_fops))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_init ping_v6_proc_exit_net(struct net *net)
 {
-	return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+	remove_proc_entry("icmp6", net->proc_net);
 }
 
 static struct pernet_operations ping_v6_net_ops = {
-- 
2.17.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox