All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <1328000796.20090116144156@emcraft.com>

diff --git a/a/1.txt b/N1/1.txt
index b86cd86..8f2fbc9 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,6 +1,4 @@
-
-
- Hello Dan,
+=0D=0A Hello Dan,
 
  Thanks for review. Some comments below.
 
@@ -12,81 +10,90 @@ On Thursday, January 15, 2009 you wrote:
 >> + * do_async_pq - asynchronously calculate P and/or Q
 >> + */
 >> +static struct dma_async_tx_descriptor *
->> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char *scfs,
->> +       unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags,
+>> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char =
+*scfs,
+>> +       unsigned int offset, int src_cnt, size_t len, enum async_tx_flag=
+s flags,
 >> +       struct dma_async_tx_descriptor *depend_tx,
 >> +       dma_async_tx_callback cb_fn, void *cb_param)
 >> +{
->> +       struct dma_device *dma = chan->device;
+>> +       struct dma_device *dma =3D chan->device;
 >> +       dma_addr_t dma_dest[2], dma_src[src_cnt];
->> +       struct dma_async_tx_descriptor *tx = NULL;
+>> +       struct dma_async_tx_descriptor *tx =3D NULL;
 >> +       dma_async_tx_callback _cb_fn;
 >> +       void *_cb_param;
->> +       unsigned char *scf = NULL;
->> +       int i, src_off = 0;
+>> +       unsigned char *scf =3D NULL;
+>> +       int i, src_off =3D 0;
 >> +       unsigned short pq_src_cnt;
 >> +       enum async_tx_flags async_flags;
->> +       enum dma_ctrl_flags dma_flags = 0;
+>> +       enum dma_ctrl_flags dma_flags =3D 0;
 >> +
 >> +       /*  If we won't handle src_cnt in one shot, then the following
 >> +        * flag(s) will be set only on the first pass of prep_dma
 >> +        */
 >> +       if (flags & ASYNC_TX_PQ_ZERO_P)
->> +               dma_flags |= DMA_PREP_ZERO_P;
+>> +               dma_flags |=3D DMA_PREP_ZERO_P;
 >> +       if (flags & ASYNC_TX_PQ_ZERO_Q)
->> +               dma_flags |= DMA_PREP_ZERO_Q;
+>> +               dma_flags |=3D DMA_PREP_ZERO_Q;
 >> +
->> +       /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+>> +       /* DMAs use destinations as sources, so use BIDIRECTIONAL mappin=
+g */
 >> +       if (blocks[src_cnt]) {
->> +               dma_dest[0] = dma_map_page(dma->dev, blocks[src_cnt],
->> +                                          offset, len, DMA_BIDIRECTIONAL);
->> +               dma_flags |= DMA_PREP_HAVE_P;
+>> +               dma_dest[0] =3D dma_map_page(dma->dev, blocks[src_cnt],
+>> +                                          offset, len, DMA_BIDIRECTIONA=
+L);
+>> +               dma_flags |=3D DMA_PREP_HAVE_P;
 >> +       }
 >> +       if (blocks[src_cnt+1]) {
->> +               dma_dest[1] = dma_map_page(dma->dev, blocks[src_cnt+1],
->> +                                          offset, len, DMA_BIDIRECTIONAL);
->> +               dma_flags |= DMA_PREP_HAVE_Q;
+>> +               dma_dest[1] =3D dma_map_page(dma->dev, blocks[src_cnt+1],
+>> +                                          offset, len, DMA_BIDIRECTIONA=
+L);
+>> +               dma_flags |=3D DMA_PREP_HAVE_Q;
 >> +       }
 >> +
->> +       for (i = 0; i < src_cnt; i++)
->> +               dma_src[i] = dma_map_page(dma->dev, blocks[i],
+>> +       for (i =3D 0; i < src_cnt; i++)
+>> +               dma_src[i] =3D dma_map_page(dma->dev, blocks[i],
 >> +                                         offset, len, DMA_TO_DEVICE);
 >> +
 >> +       while (src_cnt) {
->> +               async_flags = flags;
->> +               pq_src_cnt = min(src_cnt, (int)dma->max_pq);
->> +               /* if we are submitting additional pqs, leave the chain open,
->> +                * clear the callback parameters, and leave the destination
+>> +               async_flags =3D flags;
+>> +               pq_src_cnt =3D min(src_cnt, (int)dma->max_pq);
+>> +               /* if we are submitting additional pqs, leave the chain =
+open,
+>> +                * clear the callback parameters, and leave the destinat=
+ion
 >> +                * buffers mapped
 >> +                */
 >> +               if (src_cnt > pq_src_cnt) {
->> +                       async_flags &= ~ASYNC_TX_ACK;
->> +                       dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
->> +                       _cb_fn = NULL;
->> +                       _cb_param = NULL;
+>> +                       async_flags &=3D ~ASYNC_TX_ACK;
+>> +                       dma_flags |=3D DMA_COMPL_SKIP_DEST_UNMAP;
+>> +                       _cb_fn =3D NULL;
+>> +                       _cb_param =3D NULL;
 >> +               } else {
->> +                       _cb_fn = cb_fn;
->> +                       _cb_param = cb_param;
+>> +                       _cb_fn =3D cb_fn;
+>> +                       _cb_param =3D cb_param;
 >> +               }
 >> +               if (_cb_fn)
->> +                       dma_flags |= DMA_PREP_INTERRUPT;
+>> +                       dma_flags |=3D DMA_PREP_INTERRUPT;
 >> +               if (scfs)
->> +                       scf = &scfs[src_off];
+>> +                       scf =3D &scfs[src_off];
 >> +
 >> +               /* Since we have clobbered the src_list we are committed
 >> +                * to doing this asynchronously.  Drivers force forward
 >> +                * progress in case they can not provide a descriptor
 >> +                */
->> +               tx = dma->device_prep_dma_pq(chan, dma_dest,
->> +                                            &dma_src[src_off], pq_src_cnt,
+>> +               tx =3D dma->device_prep_dma_pq(chan, dma_dest,
+>> +                                            &dma_src[src_off], pq_src_c=
+nt,
 >> +                                            scf, len, dma_flags);
 >> +               if (unlikely(!tx))
 >> +                       async_tx_quiesce(&depend_tx);
 >> +
->> +               /* spin wait for the preceeding transactions to complete */
+>> +               /* spin wait for the preceeding transactions to complete=
+ */
 >> +               while (unlikely(!tx)) {
 >> +                       dma_async_issue_pending(chan);
->> +                       tx = dma->device_prep_dma_pq(chan, dma_dest,
+>> +                       tx =3D dma->device_prep_dma_pq(chan, dma_dest,
 >> +                                       &dma_src[src_off], pq_src_cnt,
 >> +                                       scf, len, dma_flags);
 >> +               }
@@ -94,20 +101,21 @@ On Thursday, January 15, 2009 you wrote:
 >> +               async_tx_submit(chan, tx, async_flags, depend_tx,
 >> +                               _cb_fn, _cb_param);
 >> +
->> +               depend_tx = tx;
->> +               flags |= ASYNC_TX_DEP_ACK;
+>> +               depend_tx =3D tx;
+>> +               flags |=3D ASYNC_TX_DEP_ACK;
 >> +
 >> +               if (src_cnt > pq_src_cnt) {
 >> +                       /* drop completed sources */
->> +                       src_cnt -= pq_src_cnt;
->> +                       src_off += pq_src_cnt;
+>> +                       src_cnt -=3D pq_src_cnt;
+>> +                       src_off +=3D pq_src_cnt;
 >> +
 >> +                       /* use the intermediate result as a source; we
 >> +                        * clear DMA_PREP_ZERO, so prep_dma_pq will
 >> +                        * include destination(s) into calculations. Thus
 >> +                        * keep DMA_PREP_HAVE_x in dma_flags only
 >> +                        */
->> +                       dma_flags &= (DMA_PREP_HAVE_P | DMA_PREP_HAVE_Q);
+>> +                       dma_flags &=3D (DMA_PREP_HAVE_P | DMA_PREP_HAVE_=
+Q);
 
 > I don't think this will work as we will be mixing Q into the new P and
 > P into the new Q.  In order to support (src_cnt > device->max_pq) we
@@ -115,84 +123,88 @@ On Thursday, January 15, 2009 you wrote:
 > continued (DMA_PREP_CONTINUE) and to apply different coeffeicients to
 > P and Q to cancel the effect of including them as sources.
 
- With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P 
-isn't mixed into new Q. For your example of max_pq=4:
+ With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P=20
+isn't mixed into new Q. For your example of max_pq=3D4:
 
- p, q = PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10}))
+ p, q =3D PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10=
+}))
 
  with the current implementation will be split into:
 
- p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})
- p`,q` = PQ(src4, COEF({10}))
+ p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})
+ p`,q` =3D PQ(src4, COEF({10}))
 
  which will result to the following:
 
- p = ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + src3
- q = ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 + {04}*src2 + {08}*src3
- 
- p` = p + src4
- q` = q + {10}*src4
+ p =3D ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + s=
+rc3
+ q =3D ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 =
++ {04}*src2 + {08}*src3
+=20
+ p` =3D p + src4
+ q` =3D q + {10}*src4
 
- But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will 
+ But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will=20
 have a place indeed.
 
 >  Here is an
-> example of supporting a 5 source pq operation where max_pq == 4 (the
+> example of supporting a 5 source pq operation where max_pq =3D=3D 4 (the
 > minimum).
 
->     p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))
->     p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))
+>     p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))
+>     p', q' =3D PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))
 
->     p' = p + q + q + src4 = p + src4 = P
->     q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10)*src4 = Q
+>     p' =3D p + q + q + src4 =3D p + src4 =3D P
+>     q' =3D {00}*p + {01}*q + {00}*q + {10}*src4 =3D q + {10)*src4 =3D Q
 
 > ...at no point do we need to zero P or Q.  Yes, this requires a lot of
 > extra work for incremental sources,
 
- I would say, that 'very very lot'. In general this means that for 
+ I would say, that 'very very lot'. In general this means that for=20
 the cases of N sources > max_pq we'll have to do:
 
- C = 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA.
+ C =3D 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA.
 
- E.g., for max_pq = 4:
+ E.g., for max_pq =3D 4:
 
- N = 5 => C = 2,
- N = 6 => C = 3,
+ N =3D 5 =3D> C =3D 2,
+ N =3D 6 =3D> C =3D 3,
  ..
- N = 15 => C = 12,
- N = 16 => C = 13,
+ N =3D 15 =3D> C =3D 12,
+ N =3D 16 =3D> C =3D 13,
  ..
- N = 128 => C = 125.
+ N =3D 128 =3D> C =3D 125.
 
 
  If we stay with the current approach of using DMA_PREP_ZERO_P/Q, then
 
- C = 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA.
+ C =3D 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA.
 
  And the same series will result to:
 
- N = 5 => C = 2,
- N = 6 => C = 2,
+ N =3D 5 =3D> C =3D 2,
+ N =3D 6 =3D> C =3D 2,
  ..
- N = 15 => C = 4,
- N = 16 => C = 4,
+ N =3D 15 =3D> C =3D 4,
+ N =3D 16 =3D> C =3D 4,
  ..
- N = 128 => C = 32.
+ N =3D 128 =3D> C =3D 32.
 
 
- I'm afraid that the difference (13/4, 125/32) is very significant, so 
-getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement 
+ I'm afraid that the difference (13/4, 125/32) is very significant, so=20
+getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement=20
 which could be achieved with the current approach.
 
->  but at this point I do not see a cleaner alternatve for engines like iop13xx.
+>  but at this point I do not see a cleaner alternatve for engines like iop=
+13xx.
 
- I can't find any description of iop13xx processors at Intel's 
+ I can't find any description of iop13xx processors at Intel's=20
 web-site, only 3xx:
 
-http://www.intel.com/design/iio/index.htm?iid=ipp_embed+embed_io
+http://www.intel.com/design/iio/index.htm?iid=3Dipp_embed+embed_io
 
- So, it's hard for me to do any suggestions. I just wonder - doesn't 
-iop13xx allow users to program destination addresses into the sources 
+ So, it's hard for me to do any suggestions. I just wonder - doesn't=20
+iop13xx allow users to program destination addresses into the sources=20
 fields of descriptors?
 
 >> +               } else
@@ -206,19 +218,22 @@ fields of descriptors?
 >> + * do_sync_pq - synchronously calculate P and Q
 >> + */
 >> +static void
->> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offset,
+>> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offs=
+et,
 >> +       int src_cnt, size_t len, enum async_tx_flags flags,
 >> +       struct dma_async_tx_descriptor *depend_tx,
 >> +       dma_async_tx_callback cb_fn, void *cb_param)
 >> +{
 >> +       int i, pos;
->> +       uint8_t *p = NULL, *q = NULL, *src;
+>> +       uint8_t *p =3D NULL, *q =3D NULL, *src;
 >> +
 >> +       /* set destination addresses */
 >> +       if (blocks[src_cnt])
->> +               p = (uint8_t *)(page_address(blocks[src_cnt]) + offset);
+>> +               p =3D (uint8_t *)(page_address(blocks[src_cnt]) + offset=
+);
 >> +       if (blocks[src_cnt+1])
->> +               q = (uint8_t *)(page_address(blocks[src_cnt+1]) + offset);
+>> +               q =3D (uint8_t *)(page_address(blocks[src_cnt+1]) + offs=
+et);
 >> +
 >> +       if (flags & ASYNC_TX_PQ_ZERO_P) {
 >> +               BUG_ON(!p);
@@ -230,13 +245,14 @@ fields of descriptors?
 >> +               memset(q, 0, len);
 >> +       }
 >> +
->> +       for (i = 0; i < src_cnt; i++) {
->> +               src = (uint8_t *)(page_address(blocks[i]) + offset);
->> +               for (pos = 0; pos < len; pos++) {
+>> +       for (i =3D 0; i < src_cnt; i++) {
+>> +               src =3D (uint8_t *)(page_address(blocks[i]) + offset);
+>> +               for (pos =3D 0; pos < len; pos++) {
 >> +                       if (p)
->> +                               p[pos] ^= src[pos];
+>> +                               p[pos] ^=3D src[pos];
 >> +                       if (q)
->> +                               q[pos] ^= raid6_gfmul[scfs[i]][src[pos]];
+>> +                               q[pos] ^=3D raid6_gfmul[scfs[i]][src[pos=
+]];
 >> +               }
 >> +       }
 >> +       async_tx_sync_epilog(cb_fn, cb_param);
@@ -246,41 +262,45 @@ fields of descriptors?
 > contents of p and q, just regenerate from the current sources.  This
 > kills another site where ASYNC_TX_PQ_ZERO_{P,Q} is used.
 
- Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for 
-the most common cases of using async_pq, i.e. the parity generating. 
-The wrap-around async_gen_syndrome() function always set these flags 
+ Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for=20
+the most common cases of using async_pq, i.e. the parity generating.=20
+The wrap-around async_gen_syndrome() function always set these flags=20
 before calling async_pq().
 
  The cases where ASYNC_TX_PQ_ZERO_{P,Q} isn't set are:
 
-(a) async_pq can't process the sources in one short because of src_cnt > 
-max_pq, so it should re-use the intermediate results (destination) as 
+(a) async_pq can't process the sources in one short because of src_cnt >=20
+max_pq, so it should re-use the intermediate results (destination) as=20
 the sources;
 
-(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the 
+(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the=20
 destination as the source.
 
 
- So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go 
-away, if there were no significant overheads in (a) implemented 
+ So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go=20
+away, if there were no significant overheads in (a) implemented=20
 without these flags (see above).
 
 >> +
 >> +/**
->> + * async_pq - attempt to do XOR and Galois calculations in parallel using
+>> + * async_pq - attempt to do XOR and Galois calculations in parallel usi=
+ng
 >> + *     a dma engine.
->> + * @blocks: source block array from 0 to (src_cnt-1) with the p destination
+>> + * @blocks: source block array from 0 to (src_cnt-1) with the p destina=
+tion
 >> + *     at blocks[src_cnt] and q at blocks[src_cnt + 1]. Only one of two
 >> + *     destinations may be present (another then has to be set to NULL).
 >> + *     By default, the result of calculations is XOR-ed with the initial
 >> + *     content of the destinationa buffers. Use ASYNC_TX_PQ_ZERO_x flags
 >> + *     to avoid this.
->> + *     NOTE: client code must assume the contents of this array are destroyed
+>> + *     NOTE: client code must assume the contents of this array are des=
+troyed
 >> + * @scfs: array of source coefficients used in GF-multiplication
 >> + * @offset: offset in pages to start transaction
 >> + * @src_cnt: number of source pages
 >> + * @len: length in bytes
->> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHERENT,
+>> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHE=
+RENT,
 >> + *     ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ASYNC_TX_ASYNC_ONLY
 >> + * @depend_tx: depends on the result of this transaction.
 >> + * @cb_fn: function to call when the operation completes
@@ -292,52 +312,59 @@ without these flags (see above).
 >> +       struct dma_async_tx_descriptor *depend_tx,
 >> +       dma_async_tx_callback cb_fn, void *cb_param)
 >> +{
->> +       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ,
+>> +       struct dma_chan *chan =3D async_tx_find_channel(depend_tx, DMA_P=
+Q,
 >> +                                       &blocks[src_cnt], 2,
 >> +                                       blocks, src_cnt, len);
->> +       struct dma_device *device = chan ? chan->device : NULL;
->> +       struct dma_async_tx_descriptor *tx = NULL;
+>> +       struct dma_device *device =3D chan ? chan->device : NULL;
+>> +       struct dma_async_tx_descriptor *tx =3D NULL;
 >> +
 >> +       if (!device && (flags & ASYNC_TX_ASYNC_ONLY))
 >> +               return NULL;
 >> +
 >> +       if (device) {
 >> +               /* run pq asynchronously */
->> +               tx = do_async_pq(chan, blocks, scfs, offset, src_cnt,
+>> +               tx =3D do_async_pq(chan, blocks, scfs, offset, src_cnt,
 >> +                       len, flags, depend_tx, cb_fn,cb_param);
 >> +       } else {
 >> +               /* run pq synchronously */
 >> +               if (!blocks[src_cnt+1]) {
->> +                       struct page *pdst = blocks[src_cnt];
+>> +                       struct page *pdst =3D blocks[src_cnt];
 >> +                       int i;
 >> +
 >> +                       /* Calculate P-parity only.
 >> +                        * As opposite to async_xor(), async_pq() assumes
->> +                        * that destinations are included into calculations,
+>> +                        * that destinations are included into calculati=
+ons,
 >> +                        * so we should re-arrange the xor src list to
 >> +                        * achieve the similar behavior.
 >> +                        */
 >> +                       if (!(flags & ASYNC_TX_PQ_ZERO_P)) {
->> +                               /* If async_pq() user doesn't set ZERO flag,
+>> +                               /* If async_pq() user doesn't set ZERO f=
+lag,
 >> +                                * it's assumed that destination has some
->> +                                * reasonable data to include in calculations.
->> +                                * The destination must be at position 0, so
+>> +                                * reasonable data to include in calcula=
+tions.
+>> +                                * The destination must be at position 0=
+, so
 >> +                                * shift the sources and put pdst at the
 >> +                                * beginning of the list.
 >> +                                */
->> +                               for (i = src_cnt - 1; i >= 0; i--)
->> +                                       blocks[i+1] = blocks[i];
->> +                               blocks[0] = pdst;
+>> +                               for (i =3D src_cnt - 1; i >=3D 0; i--)
+>> +                                       blocks[i+1] =3D blocks[i];
+>> +                               blocks[0] =3D pdst;
 >> +                               src_cnt++;
->> +                               flags |= ASYNC_TX_XOR_DROP_DST;
+>> +                               flags |=3D ASYNC_TX_XOR_DROP_DST;
 >> +                       } else {
->> +                               /* If async_pq() user want to clear P, then
->> +                                * this will be done automatically in async
+>> +                               /* If async_pq() user want to clear P, t=
+hen
+>> +                                * this will be done automatically in as=
+ync
 >> +                                * case, and with the help of ZERO_DST in
 >> +                                * the sync one.
 >> +                                */
->> +                               flags &= ~ASYNC_TX_PQ_ZERO_P;
->> +                               flags |= ASYNC_TX_XOR_ZERO_DST;
+>> +                               flags &=3D ~ASYNC_TX_PQ_ZERO_P;
+>> +                               flags |=3D ASYNC_TX_XOR_ZERO_DST;
 >> +                       }
 >> +
 >> +                       return async_xor(pdst, blocks, offset,
@@ -356,8 +383,10 @@ without these flags (see above).
 
 >> @@ -81,14 +81,28 @@ enum dma_transaction_type {
 >>  *     dependency chains
->>  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
->>  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
+>>  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source b=
+uffer(s)
+>>  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destina=
+tion(s)
 >> + * @DMA_PREP_HAVE_P - set if the destination list includes the correct
 >> + *     address of P (P-parity should be handled)
 >> + * @DMA_PREP_HAVE_Q - set if the destination list includes the correct
@@ -366,15 +395,15 @@ without these flags (see above).
 >> + * @DMA_PREP_ZERO_Q - set if Q has to be zeroed before proceeding
 >>  */
 >>  enum dma_ctrl_flags {
->>        DMA_PREP_INTERRUPT = (1 << 0),
->>        DMA_CTRL_ACK = (1 << 1),
->>        DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
->>        DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
+>>        DMA_PREP_INTERRUPT =3D (1 << 0),
+>>        DMA_CTRL_ACK =3D (1 << 1),
+>>        DMA_COMPL_SKIP_SRC_UNMAP =3D (1 << 2),
+>>        DMA_COMPL_SKIP_DEST_UNMAP =3D (1 << 3),
 >> +
->> +       DMA_PREP_HAVE_P = (1 << 4),
->> +       DMA_PREP_HAVE_Q = (1 << 5),
->> +       DMA_PREP_ZERO_P = (1 << 6),
->> +       DMA_PREP_ZERO_Q = (1 << 7),
+>> +       DMA_PREP_HAVE_P =3D (1 << 4),
+>> +       DMA_PREP_HAVE_Q =3D (1 << 5),
+>> +       DMA_PREP_ZERO_P =3D (1 << 6),
+>> +       DMA_PREP_ZERO_Q =3D (1 << 7),
 >>  };
 >>
 >> +#define DMA_PCHECK_FAILED      (1 << 0)
diff --git a/a/content_digest b/N1/content_digest
index 47176eb..51ff5b4 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -6,14 +6,12 @@
  "To\0Dan Williams <dan.j.williams@intel.com>\0"
  "Cc\0linux-raid@vger.kernel.org"
   linuxppc-dev@ozlabs.org
-  dzu@denx.de
   wd@denx.de
+  dzu@denx.de
  " yanok@emcraft.com\0"
  "\00:1\0"
  "b\0"
- "\n"
- "\n"
- " Hello Dan,\n"
+ "=0D=0A Hello Dan,\n"
  "\n"
  " Thanks for review. Some comments below.\n"
  "\n"
@@ -25,81 +23,90 @@
  ">> + * do_async_pq - asynchronously calculate P and/or Q\n"
  ">> + */\n"
  ">> +static struct dma_async_tx_descriptor *\n"
- ">> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char *scfs,\n"
- ">> +       unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags,\n"
+ ">> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char =\n"
+ "*scfs,\n"
+ ">> +       unsigned int offset, int src_cnt, size_t len, enum async_tx_flag=\n"
+ "s flags,\n"
  ">> +       struct dma_async_tx_descriptor *depend_tx,\n"
  ">> +       dma_async_tx_callback cb_fn, void *cb_param)\n"
  ">> +{\n"
- ">> +       struct dma_device *dma = chan->device;\n"
+ ">> +       struct dma_device *dma =3D chan->device;\n"
  ">> +       dma_addr_t dma_dest[2], dma_src[src_cnt];\n"
- ">> +       struct dma_async_tx_descriptor *tx = NULL;\n"
+ ">> +       struct dma_async_tx_descriptor *tx =3D NULL;\n"
  ">> +       dma_async_tx_callback _cb_fn;\n"
  ">> +       void *_cb_param;\n"
- ">> +       unsigned char *scf = NULL;\n"
- ">> +       int i, src_off = 0;\n"
+ ">> +       unsigned char *scf =3D NULL;\n"
+ ">> +       int i, src_off =3D 0;\n"
  ">> +       unsigned short pq_src_cnt;\n"
  ">> +       enum async_tx_flags async_flags;\n"
- ">> +       enum dma_ctrl_flags dma_flags = 0;\n"
+ ">> +       enum dma_ctrl_flags dma_flags =3D 0;\n"
  ">> +\n"
  ">> +       /*  If we won't handle src_cnt in one shot, then the following\n"
  ">> +        * flag(s) will be set only on the first pass of prep_dma\n"
  ">> +        */\n"
  ">> +       if (flags & ASYNC_TX_PQ_ZERO_P)\n"
- ">> +               dma_flags |= DMA_PREP_ZERO_P;\n"
+ ">> +               dma_flags |=3D DMA_PREP_ZERO_P;\n"
  ">> +       if (flags & ASYNC_TX_PQ_ZERO_Q)\n"
- ">> +               dma_flags |= DMA_PREP_ZERO_Q;\n"
+ ">> +               dma_flags |=3D DMA_PREP_ZERO_Q;\n"
  ">> +\n"
- ">> +       /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */\n"
+ ">> +       /* DMAs use destinations as sources, so use BIDIRECTIONAL mappin=\n"
+ "g */\n"
  ">> +       if (blocks[src_cnt]) {\n"
- ">> +               dma_dest[0] = dma_map_page(dma->dev, blocks[src_cnt],\n"
- ">> +                                          offset, len, DMA_BIDIRECTIONAL);\n"
- ">> +               dma_flags |= DMA_PREP_HAVE_P;\n"
+ ">> +               dma_dest[0] =3D dma_map_page(dma->dev, blocks[src_cnt],\n"
+ ">> +                                          offset, len, DMA_BIDIRECTIONA=\n"
+ "L);\n"
+ ">> +               dma_flags |=3D DMA_PREP_HAVE_P;\n"
  ">> +       }\n"
  ">> +       if (blocks[src_cnt+1]) {\n"
- ">> +               dma_dest[1] = dma_map_page(dma->dev, blocks[src_cnt+1],\n"
- ">> +                                          offset, len, DMA_BIDIRECTIONAL);\n"
- ">> +               dma_flags |= DMA_PREP_HAVE_Q;\n"
+ ">> +               dma_dest[1] =3D dma_map_page(dma->dev, blocks[src_cnt+1],\n"
+ ">> +                                          offset, len, DMA_BIDIRECTIONA=\n"
+ "L);\n"
+ ">> +               dma_flags |=3D DMA_PREP_HAVE_Q;\n"
  ">> +       }\n"
  ">> +\n"
- ">> +       for (i = 0; i < src_cnt; i++)\n"
- ">> +               dma_src[i] = dma_map_page(dma->dev, blocks[i],\n"
+ ">> +       for (i =3D 0; i < src_cnt; i++)\n"
+ ">> +               dma_src[i] =3D dma_map_page(dma->dev, blocks[i],\n"
  ">> +                                         offset, len, DMA_TO_DEVICE);\n"
  ">> +\n"
  ">> +       while (src_cnt) {\n"
- ">> +               async_flags = flags;\n"
- ">> +               pq_src_cnt = min(src_cnt, (int)dma->max_pq);\n"
- ">> +               /* if we are submitting additional pqs, leave the chain open,\n"
- ">> +                * clear the callback parameters, and leave the destination\n"
+ ">> +               async_flags =3D flags;\n"
+ ">> +               pq_src_cnt =3D min(src_cnt, (int)dma->max_pq);\n"
+ ">> +               /* if we are submitting additional pqs, leave the chain =\n"
+ "open,\n"
+ ">> +                * clear the callback parameters, and leave the destinat=\n"
+ "ion\n"
  ">> +                * buffers mapped\n"
  ">> +                */\n"
  ">> +               if (src_cnt > pq_src_cnt) {\n"
- ">> +                       async_flags &= ~ASYNC_TX_ACK;\n"
- ">> +                       dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;\n"
- ">> +                       _cb_fn = NULL;\n"
- ">> +                       _cb_param = NULL;\n"
+ ">> +                       async_flags &=3D ~ASYNC_TX_ACK;\n"
+ ">> +                       dma_flags |=3D DMA_COMPL_SKIP_DEST_UNMAP;\n"
+ ">> +                       _cb_fn =3D NULL;\n"
+ ">> +                       _cb_param =3D NULL;\n"
  ">> +               } else {\n"
- ">> +                       _cb_fn = cb_fn;\n"
- ">> +                       _cb_param = cb_param;\n"
+ ">> +                       _cb_fn =3D cb_fn;\n"
+ ">> +                       _cb_param =3D cb_param;\n"
  ">> +               }\n"
  ">> +               if (_cb_fn)\n"
- ">> +                       dma_flags |= DMA_PREP_INTERRUPT;\n"
+ ">> +                       dma_flags |=3D DMA_PREP_INTERRUPT;\n"
  ">> +               if (scfs)\n"
- ">> +                       scf = &scfs[src_off];\n"
+ ">> +                       scf =3D &scfs[src_off];\n"
  ">> +\n"
  ">> +               /* Since we have clobbered the src_list we are committed\n"
  ">> +                * to doing this asynchronously.  Drivers force forward\n"
  ">> +                * progress in case they can not provide a descriptor\n"
  ">> +                */\n"
- ">> +               tx = dma->device_prep_dma_pq(chan, dma_dest,\n"
- ">> +                                            &dma_src[src_off], pq_src_cnt,\n"
+ ">> +               tx =3D dma->device_prep_dma_pq(chan, dma_dest,\n"
+ ">> +                                            &dma_src[src_off], pq_src_c=\n"
+ "nt,\n"
  ">> +                                            scf, len, dma_flags);\n"
  ">> +               if (unlikely(!tx))\n"
  ">> +                       async_tx_quiesce(&depend_tx);\n"
  ">> +\n"
- ">> +               /* spin wait for the preceeding transactions to complete */\n"
+ ">> +               /* spin wait for the preceeding transactions to complete=\n"
+ " */\n"
  ">> +               while (unlikely(!tx)) {\n"
  ">> +                       dma_async_issue_pending(chan);\n"
- ">> +                       tx = dma->device_prep_dma_pq(chan, dma_dest,\n"
+ ">> +                       tx =3D dma->device_prep_dma_pq(chan, dma_dest,\n"
  ">> +                                       &dma_src[src_off], pq_src_cnt,\n"
  ">> +                                       scf, len, dma_flags);\n"
  ">> +               }\n"
@@ -107,20 +114,21 @@
  ">> +               async_tx_submit(chan, tx, async_flags, depend_tx,\n"
  ">> +                               _cb_fn, _cb_param);\n"
  ">> +\n"
- ">> +               depend_tx = tx;\n"
- ">> +               flags |= ASYNC_TX_DEP_ACK;\n"
+ ">> +               depend_tx =3D tx;\n"
+ ">> +               flags |=3D ASYNC_TX_DEP_ACK;\n"
  ">> +\n"
  ">> +               if (src_cnt > pq_src_cnt) {\n"
  ">> +                       /* drop completed sources */\n"
- ">> +                       src_cnt -= pq_src_cnt;\n"
- ">> +                       src_off += pq_src_cnt;\n"
+ ">> +                       src_cnt -=3D pq_src_cnt;\n"
+ ">> +                       src_off +=3D pq_src_cnt;\n"
  ">> +\n"
  ">> +                       /* use the intermediate result as a source; we\n"
  ">> +                        * clear DMA_PREP_ZERO, so prep_dma_pq will\n"
  ">> +                        * include destination(s) into calculations. Thus\n"
  ">> +                        * keep DMA_PREP_HAVE_x in dma_flags only\n"
  ">> +                        */\n"
- ">> +                       dma_flags &= (DMA_PREP_HAVE_P | DMA_PREP_HAVE_Q);\n"
+ ">> +                       dma_flags &=3D (DMA_PREP_HAVE_P | DMA_PREP_HAVE_=\n"
+ "Q);\n"
  "\n"
  "> I don't think this will work as we will be mixing Q into the new P and\n"
  "> P into the new Q.  In order to support (src_cnt > device->max_pq) we\n"
@@ -128,84 +136,88 @@
  "> continued (DMA_PREP_CONTINUE) and to apply different coeffeicients to\n"
  "> P and Q to cancel the effect of including them as sources.\n"
  "\n"
- " With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P \n"
- "isn't mixed into new Q. For your example of max_pq=4:\n"
+ " With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P=20\n"
+ "isn't mixed into new Q. For your example of max_pq=3D4:\n"
  "\n"
- " p, q = PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10}))\n"
+ " p, q =3D PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10=\n"
+ "}))\n"
  "\n"
  " with the current implementation will be split into:\n"
  "\n"
- " p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})\n"
- " p`,q` = PQ(src4, COEF({10}))\n"
+ " p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})\n"
+ " p`,q` =3D PQ(src4, COEF({10}))\n"
  "\n"
  " which will result to the following:\n"
  "\n"
- " p = ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + src3\n"
- " q = ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 + {04}*src2 + {08}*src3\n"
- " \n"
- " p` = p + src4\n"
- " q` = q + {10}*src4\n"
+ " p =3D ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + s=\n"
+ "rc3\n"
+ " q =3D ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 =\n"
+ "+ {04}*src2 + {08}*src3\n"
+ "=20\n"
+ " p` =3D p + src4\n"
+ " q` =3D q + {10}*src4\n"
  "\n"
- " But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will \n"
+ " But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will=20\n"
  "have a place indeed.\n"
  "\n"
  ">  Here is an\n"
- "> example of supporting a 5 source pq operation where max_pq == 4 (the\n"
+ "> example of supporting a 5 source pq operation where max_pq =3D=3D 4 (the\n"
  "> minimum).\n"
  "\n"
- ">     p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))\n"
- ">     p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))\n"
+ ">     p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))\n"
+ ">     p', q' =3D PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))\n"
  "\n"
- ">     p' = p + q + q + src4 = p + src4 = P\n"
- ">     q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10)*src4 = Q\n"
+ ">     p' =3D p + q + q + src4 =3D p + src4 =3D P\n"
+ ">     q' =3D {00}*p + {01}*q + {00}*q + {10}*src4 =3D q + {10)*src4 =3D Q\n"
  "\n"
  "> ...at no point do we need to zero P or Q.  Yes, this requires a lot of\n"
  "> extra work for incremental sources,\n"
  "\n"
- " I would say, that 'very very lot'. In general this means that for \n"
+ " I would say, that 'very very lot'. In general this means that for=20\n"
  "the cases of N sources > max_pq we'll have to do:\n"
  "\n"
- " C = 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA.\n"
+ " C =3D 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA.\n"
  "\n"
- " E.g., for max_pq = 4:\n"
+ " E.g., for max_pq =3D 4:\n"
  "\n"
- " N = 5 => C = 2,\n"
- " N = 6 => C = 3,\n"
+ " N =3D 5 =3D> C =3D 2,\n"
+ " N =3D 6 =3D> C =3D 3,\n"
  " ..\n"
- " N = 15 => C = 12,\n"
- " N = 16 => C = 13,\n"
+ " N =3D 15 =3D> C =3D 12,\n"
+ " N =3D 16 =3D> C =3D 13,\n"
  " ..\n"
- " N = 128 => C = 125.\n"
+ " N =3D 128 =3D> C =3D 125.\n"
  "\n"
  "\n"
  " If we stay with the current approach of using DMA_PREP_ZERO_P/Q, then\n"
  "\n"
- " C = 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA.\n"
+ " C =3D 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA.\n"
  "\n"
  " And the same series will result to:\n"
  "\n"
- " N = 5 => C = 2,\n"
- " N = 6 => C = 2,\n"
+ " N =3D 5 =3D> C =3D 2,\n"
+ " N =3D 6 =3D> C =3D 2,\n"
  " ..\n"
- " N = 15 => C = 4,\n"
- " N = 16 => C = 4,\n"
+ " N =3D 15 =3D> C =3D 4,\n"
+ " N =3D 16 =3D> C =3D 4,\n"
  " ..\n"
- " N = 128 => C = 32.\n"
+ " N =3D 128 =3D> C =3D 32.\n"
  "\n"
  "\n"
- " I'm afraid that the difference (13/4, 125/32) is very significant, so \n"
- "getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement \n"
+ " I'm afraid that the difference (13/4, 125/32) is very significant, so=20\n"
+ "getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement=20\n"
  "which could be achieved with the current approach.\n"
  "\n"
- ">  but at this point I do not see a cleaner alternatve for engines like iop13xx.\n"
+ ">  but at this point I do not see a cleaner alternatve for engines like iop=\n"
+ "13xx.\n"
  "\n"
- " I can't find any description of iop13xx processors at Intel's \n"
+ " I can't find any description of iop13xx processors at Intel's=20\n"
  "web-site, only 3xx:\n"
  "\n"
- "http://www.intel.com/design/iio/index.htm?iid=ipp_embed+embed_io\n"
+ "http://www.intel.com/design/iio/index.htm?iid=3Dipp_embed+embed_io\n"
  "\n"
- " So, it's hard for me to do any suggestions. I just wonder - doesn't \n"
- "iop13xx allow users to program destination addresses into the sources \n"
+ " So, it's hard for me to do any suggestions. I just wonder - doesn't=20\n"
+ "iop13xx allow users to program destination addresses into the sources=20\n"
  "fields of descriptors?\n"
  "\n"
  ">> +               } else\n"
@@ -219,19 +231,22 @@
  ">> + * do_sync_pq - synchronously calculate P and Q\n"
  ">> + */\n"
  ">> +static void\n"
- ">> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offset,\n"
+ ">> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offs=\n"
+ "et,\n"
  ">> +       int src_cnt, size_t len, enum async_tx_flags flags,\n"
  ">> +       struct dma_async_tx_descriptor *depend_tx,\n"
  ">> +       dma_async_tx_callback cb_fn, void *cb_param)\n"
  ">> +{\n"
  ">> +       int i, pos;\n"
- ">> +       uint8_t *p = NULL, *q = NULL, *src;\n"
+ ">> +       uint8_t *p =3D NULL, *q =3D NULL, *src;\n"
  ">> +\n"
  ">> +       /* set destination addresses */\n"
  ">> +       if (blocks[src_cnt])\n"
- ">> +               p = (uint8_t *)(page_address(blocks[src_cnt]) + offset);\n"
+ ">> +               p =3D (uint8_t *)(page_address(blocks[src_cnt]) + offset=\n"
+ ");\n"
  ">> +       if (blocks[src_cnt+1])\n"
- ">> +               q = (uint8_t *)(page_address(blocks[src_cnt+1]) + offset);\n"
+ ">> +               q =3D (uint8_t *)(page_address(blocks[src_cnt+1]) + offs=\n"
+ "et);\n"
  ">> +\n"
  ">> +       if (flags & ASYNC_TX_PQ_ZERO_P) {\n"
  ">> +               BUG_ON(!p);\n"
@@ -243,13 +258,14 @@
  ">> +               memset(q, 0, len);\n"
  ">> +       }\n"
  ">> +\n"
- ">> +       for (i = 0; i < src_cnt; i++) {\n"
- ">> +               src = (uint8_t *)(page_address(blocks[i]) + offset);\n"
- ">> +               for (pos = 0; pos < len; pos++) {\n"
+ ">> +       for (i =3D 0; i < src_cnt; i++) {\n"
+ ">> +               src =3D (uint8_t *)(page_address(blocks[i]) + offset);\n"
+ ">> +               for (pos =3D 0; pos < len; pos++) {\n"
  ">> +                       if (p)\n"
- ">> +                               p[pos] ^= src[pos];\n"
+ ">> +                               p[pos] ^=3D src[pos];\n"
  ">> +                       if (q)\n"
- ">> +                               q[pos] ^= raid6_gfmul[scfs[i]][src[pos]];\n"
+ ">> +                               q[pos] ^=3D raid6_gfmul[scfs[i]][src[pos=\n"
+ "]];\n"
  ">> +               }\n"
  ">> +       }\n"
  ">> +       async_tx_sync_epilog(cb_fn, cb_param);\n"
@@ -259,41 +275,45 @@
  "> contents of p and q, just regenerate from the current sources.  This\n"
  "> kills another site where ASYNC_TX_PQ_ZERO_{P,Q} is used.\n"
  "\n"
- " Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for \n"
- "the most common cases of using async_pq, i.e. the parity generating. \n"
- "The wrap-around async_gen_syndrome() function always set these flags \n"
+ " Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for=20\n"
+ "the most common cases of using async_pq, i.e. the parity generating.=20\n"
+ "The wrap-around async_gen_syndrome() function always set these flags=20\n"
  "before calling async_pq().\n"
  "\n"
  " The cases where ASYNC_TX_PQ_ZERO_{P,Q} isn't set are:\n"
  "\n"
- "(a) async_pq can't process the sources in one short because of src_cnt > \n"
- "max_pq, so it should re-use the intermediate results (destination) as \n"
+ "(a) async_pq can't process the sources in one short because of src_cnt >=20\n"
+ "max_pq, so it should re-use the intermediate results (destination) as=20\n"
  "the sources;\n"
  "\n"
- "(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the \n"
+ "(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the=20\n"
  "destination as the source.\n"
  "\n"
  "\n"
- " So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go \n"
- "away, if there were no significant overheads in (a) implemented \n"
+ " So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go=20\n"
+ "away, if there were no significant overheads in (a) implemented=20\n"
  "without these flags (see above).\n"
  "\n"
  ">> +\n"
  ">> +/**\n"
- ">> + * async_pq - attempt to do XOR and Galois calculations in parallel using\n"
+ ">> + * async_pq - attempt to do XOR and Galois calculations in parallel usi=\n"
+ "ng\n"
  ">> + *     a dma engine.\n"
- ">> + * @blocks: source block array from 0 to (src_cnt-1) with the p destination\n"
+ ">> + * @blocks: source block array from 0 to (src_cnt-1) with the p destina=\n"
+ "tion\n"
  ">> + *     at blocks[src_cnt] and q at blocks[src_cnt + 1]. Only one of two\n"
  ">> + *     destinations may be present (another then has to be set to NULL).\n"
  ">> + *     By default, the result of calculations is XOR-ed with the initial\n"
  ">> + *     content of the destinationa buffers. Use ASYNC_TX_PQ_ZERO_x flags\n"
  ">> + *     to avoid this.\n"
- ">> + *     NOTE: client code must assume the contents of this array are destroyed\n"
+ ">> + *     NOTE: client code must assume the contents of this array are des=\n"
+ "troyed\n"
  ">> + * @scfs: array of source coefficients used in GF-multiplication\n"
  ">> + * @offset: offset in pages to start transaction\n"
  ">> + * @src_cnt: number of source pages\n"
  ">> + * @len: length in bytes\n"
- ">> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHERENT,\n"
+ ">> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHE=\n"
+ "RENT,\n"
  ">> + *     ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ASYNC_TX_ASYNC_ONLY\n"
  ">> + * @depend_tx: depends on the result of this transaction.\n"
  ">> + * @cb_fn: function to call when the operation completes\n"
@@ -305,52 +325,59 @@
  ">> +       struct dma_async_tx_descriptor *depend_tx,\n"
  ">> +       dma_async_tx_callback cb_fn, void *cb_param)\n"
  ">> +{\n"
- ">> +       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ,\n"
+ ">> +       struct dma_chan *chan =3D async_tx_find_channel(depend_tx, DMA_P=\n"
+ "Q,\n"
  ">> +                                       &blocks[src_cnt], 2,\n"
  ">> +                                       blocks, src_cnt, len);\n"
- ">> +       struct dma_device *device = chan ? chan->device : NULL;\n"
- ">> +       struct dma_async_tx_descriptor *tx = NULL;\n"
+ ">> +       struct dma_device *device =3D chan ? chan->device : NULL;\n"
+ ">> +       struct dma_async_tx_descriptor *tx =3D NULL;\n"
  ">> +\n"
  ">> +       if (!device && (flags & ASYNC_TX_ASYNC_ONLY))\n"
  ">> +               return NULL;\n"
  ">> +\n"
  ">> +       if (device) {\n"
  ">> +               /* run pq asynchronously */\n"
- ">> +               tx = do_async_pq(chan, blocks, scfs, offset, src_cnt,\n"
+ ">> +               tx =3D do_async_pq(chan, blocks, scfs, offset, src_cnt,\n"
  ">> +                       len, flags, depend_tx, cb_fn,cb_param);\n"
  ">> +       } else {\n"
  ">> +               /* run pq synchronously */\n"
  ">> +               if (!blocks[src_cnt+1]) {\n"
- ">> +                       struct page *pdst = blocks[src_cnt];\n"
+ ">> +                       struct page *pdst =3D blocks[src_cnt];\n"
  ">> +                       int i;\n"
  ">> +\n"
  ">> +                       /* Calculate P-parity only.\n"
  ">> +                        * As opposite to async_xor(), async_pq() assumes\n"
- ">> +                        * that destinations are included into calculations,\n"
+ ">> +                        * that destinations are included into calculati=\n"
+ "ons,\n"
  ">> +                        * so we should re-arrange the xor src list to\n"
  ">> +                        * achieve the similar behavior.\n"
  ">> +                        */\n"
  ">> +                       if (!(flags & ASYNC_TX_PQ_ZERO_P)) {\n"
- ">> +                               /* If async_pq() user doesn't set ZERO flag,\n"
+ ">> +                               /* If async_pq() user doesn't set ZERO f=\n"
+ "lag,\n"
  ">> +                                * it's assumed that destination has some\n"
- ">> +                                * reasonable data to include in calculations.\n"
- ">> +                                * The destination must be at position 0, so\n"
+ ">> +                                * reasonable data to include in calcula=\n"
+ "tions.\n"
+ ">> +                                * The destination must be at position 0=\n"
+ ", so\n"
  ">> +                                * shift the sources and put pdst at the\n"
  ">> +                                * beginning of the list.\n"
  ">> +                                */\n"
- ">> +                               for (i = src_cnt - 1; i >= 0; i--)\n"
- ">> +                                       blocks[i+1] = blocks[i];\n"
- ">> +                               blocks[0] = pdst;\n"
+ ">> +                               for (i =3D src_cnt - 1; i >=3D 0; i--)\n"
+ ">> +                                       blocks[i+1] =3D blocks[i];\n"
+ ">> +                               blocks[0] =3D pdst;\n"
  ">> +                               src_cnt++;\n"
- ">> +                               flags |= ASYNC_TX_XOR_DROP_DST;\n"
+ ">> +                               flags |=3D ASYNC_TX_XOR_DROP_DST;\n"
  ">> +                       } else {\n"
- ">> +                               /* If async_pq() user want to clear P, then\n"
- ">> +                                * this will be done automatically in async\n"
+ ">> +                               /* If async_pq() user want to clear P, t=\n"
+ "hen\n"
+ ">> +                                * this will be done automatically in as=\n"
+ "ync\n"
  ">> +                                * case, and with the help of ZERO_DST in\n"
  ">> +                                * the sync one.\n"
  ">> +                                */\n"
- ">> +                               flags &= ~ASYNC_TX_PQ_ZERO_P;\n"
- ">> +                               flags |= ASYNC_TX_XOR_ZERO_DST;\n"
+ ">> +                               flags &=3D ~ASYNC_TX_PQ_ZERO_P;\n"
+ ">> +                               flags |=3D ASYNC_TX_XOR_ZERO_DST;\n"
  ">> +                       }\n"
  ">> +\n"
  ">> +                       return async_xor(pdst, blocks, offset,\n"
@@ -369,8 +396,10 @@
  "\n"
  ">> @@ -81,14 +81,28 @@ enum dma_transaction_type {\n"
  ">>  *     dependency chains\n"
- ">>  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)\n"
- ">>  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)\n"
+ ">>  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source b=\n"
+ "uffer(s)\n"
+ ">>  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destina=\n"
+ "tion(s)\n"
  ">> + * @DMA_PREP_HAVE_P - set if the destination list includes the correct\n"
  ">> + *     address of P (P-parity should be handled)\n"
  ">> + * @DMA_PREP_HAVE_Q - set if the destination list includes the correct\n"
@@ -379,15 +408,15 @@
  ">> + * @DMA_PREP_ZERO_Q - set if Q has to be zeroed before proceeding\n"
  ">>  */\n"
  ">>  enum dma_ctrl_flags {\n"
- ">>        DMA_PREP_INTERRUPT = (1 << 0),\n"
- ">>        DMA_CTRL_ACK = (1 << 1),\n"
- ">>        DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),\n"
- ">>        DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),\n"
+ ">>        DMA_PREP_INTERRUPT =3D (1 << 0),\n"
+ ">>        DMA_CTRL_ACK =3D (1 << 1),\n"
+ ">>        DMA_COMPL_SKIP_SRC_UNMAP =3D (1 << 2),\n"
+ ">>        DMA_COMPL_SKIP_DEST_UNMAP =3D (1 << 3),\n"
  ">> +\n"
- ">> +       DMA_PREP_HAVE_P = (1 << 4),\n"
- ">> +       DMA_PREP_HAVE_Q = (1 << 5),\n"
- ">> +       DMA_PREP_ZERO_P = (1 << 6),\n"
- ">> +       DMA_PREP_ZERO_Q = (1 << 7),\n"
+ ">> +       DMA_PREP_HAVE_P =3D (1 << 4),\n"
+ ">> +       DMA_PREP_HAVE_Q =3D (1 << 5),\n"
+ ">> +       DMA_PREP_ZERO_P =3D (1 << 6),\n"
+ ">> +       DMA_PREP_ZERO_Q =3D (1 << 7),\n"
  ">>  };\n"
  ">>\n"
  ">> +#define DMA_PCHECK_FAILED      (1 << 0)\n"
@@ -404,4 +433,4 @@
  " Yuri Tikhonov, Senior Software Engineer\n"
   Emcraft Systems, www.emcraft.com
 
-3bc740d2b99bd8721cfc2c1bb4cd054b7047823538a26b2ac1621968b39830f6
+01f92c8ef0789e915350bb1da606d02d6a11d2dd23ce74b92086b1e283f3978a

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.