diff for duplicates of <1328000796.20090116144156@emcraft.com> diff --git a/a/1.txt b/N1/1.txt index b86cd86..8f2fbc9 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,6 +1,4 @@ - - - Hello Dan, +=0D=0A Hello Dan, Thanks for review. Some comments below. @@ -12,81 +10,90 @@ On Thursday, January 15, 2009 you wrote: >> + * do_async_pq - asynchronously calculate P and/or Q >> + */ >> +static struct dma_async_tx_descriptor * ->> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char *scfs, ->> + unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags, +>> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char = +*scfs, +>> + unsigned int offset, int src_cnt, size_t len, enum async_tx_flag= +s flags, >> + struct dma_async_tx_descriptor *depend_tx, >> + dma_async_tx_callback cb_fn, void *cb_param) >> +{ ->> + struct dma_device *dma = chan->device; +>> + struct dma_device *dma =3D chan->device; >> + dma_addr_t dma_dest[2], dma_src[src_cnt]; ->> + struct dma_async_tx_descriptor *tx = NULL; +>> + struct dma_async_tx_descriptor *tx =3D NULL; >> + dma_async_tx_callback _cb_fn; >> + void *_cb_param; ->> + unsigned char *scf = NULL; ->> + int i, src_off = 0; +>> + unsigned char *scf =3D NULL; +>> + int i, src_off =3D 0; >> + unsigned short pq_src_cnt; >> + enum async_tx_flags async_flags; ->> + enum dma_ctrl_flags dma_flags = 0; +>> + enum dma_ctrl_flags dma_flags =3D 0; >> + >> + /* If we won't handle src_cnt in one shot, then the following >> + * flag(s) will be set only on the first pass of prep_dma >> + */ >> + if (flags & ASYNC_TX_PQ_ZERO_P) ->> + dma_flags |= DMA_PREP_ZERO_P; +>> + dma_flags |=3D DMA_PREP_ZERO_P; >> + if (flags & ASYNC_TX_PQ_ZERO_Q) ->> + dma_flags |= DMA_PREP_ZERO_Q; +>> + dma_flags |=3D DMA_PREP_ZERO_Q; >> + ->> + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ +>> + /* DMAs use destinations as sources, so use BIDIRECTIONAL mappin= +g */ >> + if (blocks[src_cnt]) { ->> + dma_dest[0] = dma_map_page(dma->dev, blocks[src_cnt], ->> + offset, len, DMA_BIDIRECTIONAL); ->> + dma_flags |= DMA_PREP_HAVE_P; +>> + dma_dest[0] =3D dma_map_page(dma->dev, blocks[src_cnt], +>> + offset, len, DMA_BIDIRECTIONA= +L); +>> + dma_flags |=3D DMA_PREP_HAVE_P; >> + } >> + if (blocks[src_cnt+1]) { ->> + dma_dest[1] = dma_map_page(dma->dev, blocks[src_cnt+1], ->> + offset, len, DMA_BIDIRECTIONAL); ->> + dma_flags |= DMA_PREP_HAVE_Q; +>> + dma_dest[1] =3D dma_map_page(dma->dev, blocks[src_cnt+1], +>> + offset, len, DMA_BIDIRECTIONA= +L); +>> + dma_flags |=3D DMA_PREP_HAVE_Q; >> + } >> + ->> + for (i = 0; i < src_cnt; i++) ->> + dma_src[i] = dma_map_page(dma->dev, blocks[i], +>> + for (i =3D 0; i < src_cnt; i++) +>> + dma_src[i] =3D dma_map_page(dma->dev, blocks[i], >> + offset, len, DMA_TO_DEVICE); >> + >> + while (src_cnt) { ->> + async_flags = flags; ->> + pq_src_cnt = min(src_cnt, (int)dma->max_pq); ->> + /* if we are submitting additional pqs, leave the chain open, ->> + * clear the callback parameters, and leave the destination +>> + async_flags =3D flags; +>> + pq_src_cnt =3D min(src_cnt, (int)dma->max_pq); +>> + /* if we are submitting additional pqs, leave the chain = +open, +>> + * clear the callback parameters, and leave the destinat= +ion >> + * buffers mapped >> + */ >> + if (src_cnt > pq_src_cnt) { ->> + async_flags &= ~ASYNC_TX_ACK; ->> + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; ->> + _cb_fn = NULL; ->> + _cb_param = NULL; +>> + async_flags &=3D ~ASYNC_TX_ACK; +>> + dma_flags |=3D DMA_COMPL_SKIP_DEST_UNMAP; +>> + _cb_fn =3D NULL; +>> + _cb_param =3D NULL; >> + } else { ->> + _cb_fn = cb_fn; ->> + _cb_param = cb_param; +>> + _cb_fn =3D cb_fn; +>> + _cb_param =3D cb_param; >> + } >> + if (_cb_fn) ->> + dma_flags |= DMA_PREP_INTERRUPT; +>> + dma_flags |=3D DMA_PREP_INTERRUPT; >> + if (scfs) ->> + scf = &scfs[src_off]; +>> + scf =3D &scfs[src_off]; >> + >> + /* Since we have clobbered the src_list we are committed >> + * to doing this asynchronously. Drivers force forward >> + * progress in case they can not provide a descriptor >> + */ ->> + tx = dma->device_prep_dma_pq(chan, dma_dest, ->> + &dma_src[src_off], pq_src_cnt, +>> + tx =3D dma->device_prep_dma_pq(chan, dma_dest, +>> + &dma_src[src_off], pq_src_c= +nt, >> + scf, len, dma_flags); >> + if (unlikely(!tx)) >> + async_tx_quiesce(&depend_tx); >> + ->> + /* spin wait for the preceeding transactions to complete */ +>> + /* spin wait for the preceeding transactions to complete= + */ >> + while (unlikely(!tx)) { >> + dma_async_issue_pending(chan); ->> + tx = dma->device_prep_dma_pq(chan, dma_dest, +>> + tx =3D dma->device_prep_dma_pq(chan, dma_dest, >> + &dma_src[src_off], pq_src_cnt, >> + scf, len, dma_flags); >> + } @@ -94,20 +101,21 @@ On Thursday, January 15, 2009 you wrote: >> + async_tx_submit(chan, tx, async_flags, depend_tx, >> + _cb_fn, _cb_param); >> + ->> + depend_tx = tx; ->> + flags |= ASYNC_TX_DEP_ACK; +>> + depend_tx =3D tx; +>> + flags |=3D ASYNC_TX_DEP_ACK; >> + >> + if (src_cnt > pq_src_cnt) { >> + /* drop completed sources */ ->> + src_cnt -= pq_src_cnt; ->> + src_off += pq_src_cnt; +>> + src_cnt -=3D pq_src_cnt; +>> + src_off +=3D pq_src_cnt; >> + >> + /* use the intermediate result as a source; we >> + * clear DMA_PREP_ZERO, so prep_dma_pq will >> + * include destination(s) into calculations. Thus >> + * keep DMA_PREP_HAVE_x in dma_flags only >> + */ ->> + dma_flags &= (DMA_PREP_HAVE_P | DMA_PREP_HAVE_Q); +>> + dma_flags &=3D (DMA_PREP_HAVE_P | DMA_PREP_HAVE_= +Q); > I don't think this will work as we will be mixing Q into the new P and > P into the new Q. In order to support (src_cnt > device->max_pq) we @@ -115,84 +123,88 @@ On Thursday, January 15, 2009 you wrote: > continued (DMA_PREP_CONTINUE) and to apply different coeffeicients to > P and Q to cancel the effect of including them as sources. - With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P -isn't mixed into new Q. For your example of max_pq=4: + With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P=20 +isn't mixed into new Q. For your example of max_pq=3D4: - p, q = PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10})) + p, q =3D PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10= +})) with the current implementation will be split into: - p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}) - p`,q` = PQ(src4, COEF({10})) + p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}) + p`,q` =3D PQ(src4, COEF({10})) which will result to the following: - p = ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + src3 - q = ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 + {04}*src2 + {08}*src3 - - p` = p + src4 - q` = q + {10}*src4 + p =3D ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + s= +rc3 + q =3D ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 = ++ {04}*src2 + {08}*src3 +=20 + p` =3D p + src4 + q` =3D q + {10}*src4 - But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will + But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will=20 have a place indeed. > Here is an -> example of supporting a 5 source pq operation where max_pq == 4 (the +> example of supporting a 5 source pq operation where max_pq =3D=3D 4 (the > minimum). -> p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) -> p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) +> p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) +> p', q' =3D PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) -> p' = p + q + q + src4 = p + src4 = P -> q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10)*src4 = Q +> p' =3D p + q + q + src4 =3D p + src4 =3D P +> q' =3D {00}*p + {01}*q + {00}*q + {10}*src4 =3D q + {10)*src4 =3D Q > ...at no point do we need to zero P or Q. Yes, this requires a lot of > extra work for incremental sources, - I would say, that 'very very lot'. In general this means that for + I would say, that 'very very lot'. In general this means that for=20 the cases of N sources > max_pq we'll have to do: - C = 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA. + C =3D 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA. - E.g., for max_pq = 4: + E.g., for max_pq =3D 4: - N = 5 => C = 2, - N = 6 => C = 3, + N =3D 5 =3D> C =3D 2, + N =3D 6 =3D> C =3D 3, .. - N = 15 => C = 12, - N = 16 => C = 13, + N =3D 15 =3D> C =3D 12, + N =3D 16 =3D> C =3D 13, .. - N = 128 => C = 125. + N =3D 128 =3D> C =3D 125. If we stay with the current approach of using DMA_PREP_ZERO_P/Q, then - C = 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA. + C =3D 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA. And the same series will result to: - N = 5 => C = 2, - N = 6 => C = 2, + N =3D 5 =3D> C =3D 2, + N =3D 6 =3D> C =3D 2, .. - N = 15 => C = 4, - N = 16 => C = 4, + N =3D 15 =3D> C =3D 4, + N =3D 16 =3D> C =3D 4, .. - N = 128 => C = 32. + N =3D 128 =3D> C =3D 32. - I'm afraid that the difference (13/4, 125/32) is very significant, so -getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement + I'm afraid that the difference (13/4, 125/32) is very significant, so=20 +getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement=20 which could be achieved with the current approach. -> but at this point I do not see a cleaner alternatve for engines like iop13xx. +> but at this point I do not see a cleaner alternatve for engines like iop= +13xx. - I can't find any description of iop13xx processors at Intel's + I can't find any description of iop13xx processors at Intel's=20 web-site, only 3xx: -http://www.intel.com/design/iio/index.htm?iid=ipp_embed+embed_io +http://www.intel.com/design/iio/index.htm?iid=3Dipp_embed+embed_io - So, it's hard for me to do any suggestions. I just wonder - doesn't -iop13xx allow users to program destination addresses into the sources + So, it's hard for me to do any suggestions. I just wonder - doesn't=20 +iop13xx allow users to program destination addresses into the sources=20 fields of descriptors? >> + } else @@ -206,19 +218,22 @@ fields of descriptors? >> + * do_sync_pq - synchronously calculate P and Q >> + */ >> +static void ->> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offset, +>> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offs= +et, >> + int src_cnt, size_t len, enum async_tx_flags flags, >> + struct dma_async_tx_descriptor *depend_tx, >> + dma_async_tx_callback cb_fn, void *cb_param) >> +{ >> + int i, pos; ->> + uint8_t *p = NULL, *q = NULL, *src; +>> + uint8_t *p =3D NULL, *q =3D NULL, *src; >> + >> + /* set destination addresses */ >> + if (blocks[src_cnt]) ->> + p = (uint8_t *)(page_address(blocks[src_cnt]) + offset); +>> + p =3D (uint8_t *)(page_address(blocks[src_cnt]) + offset= +); >> + if (blocks[src_cnt+1]) ->> + q = (uint8_t *)(page_address(blocks[src_cnt+1]) + offset); +>> + q =3D (uint8_t *)(page_address(blocks[src_cnt+1]) + offs= +et); >> + >> + if (flags & ASYNC_TX_PQ_ZERO_P) { >> + BUG_ON(!p); @@ -230,13 +245,14 @@ fields of descriptors? >> + memset(q, 0, len); >> + } >> + ->> + for (i = 0; i < src_cnt; i++) { ->> + src = (uint8_t *)(page_address(blocks[i]) + offset); ->> + for (pos = 0; pos < len; pos++) { +>> + for (i =3D 0; i < src_cnt; i++) { +>> + src =3D (uint8_t *)(page_address(blocks[i]) + offset); +>> + for (pos =3D 0; pos < len; pos++) { >> + if (p) ->> + p[pos] ^= src[pos]; +>> + p[pos] ^=3D src[pos]; >> + if (q) ->> + q[pos] ^= raid6_gfmul[scfs[i]][src[pos]]; +>> + q[pos] ^=3D raid6_gfmul[scfs[i]][src[pos= +]]; >> + } >> + } >> + async_tx_sync_epilog(cb_fn, cb_param); @@ -246,41 +262,45 @@ fields of descriptors? > contents of p and q, just regenerate from the current sources. This > kills another site where ASYNC_TX_PQ_ZERO_{P,Q} is used. - Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for -the most common cases of using async_pq, i.e. the parity generating. -The wrap-around async_gen_syndrome() function always set these flags + Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for=20 +the most common cases of using async_pq, i.e. the parity generating.=20 +The wrap-around async_gen_syndrome() function always set these flags=20 before calling async_pq(). The cases where ASYNC_TX_PQ_ZERO_{P,Q} isn't set are: -(a) async_pq can't process the sources in one short because of src_cnt > -max_pq, so it should re-use the intermediate results (destination) as +(a) async_pq can't process the sources in one short because of src_cnt >=20 +max_pq, so it should re-use the intermediate results (destination) as=20 the sources; -(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the +(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the=20 destination as the source. - So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go -away, if there were no significant overheads in (a) implemented + So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go=20 +away, if there were no significant overheads in (a) implemented=20 without these flags (see above). >> + >> +/** ->> + * async_pq - attempt to do XOR and Galois calculations in parallel using +>> + * async_pq - attempt to do XOR and Galois calculations in parallel usi= +ng >> + * a dma engine. ->> + * @blocks: source block array from 0 to (src_cnt-1) with the p destination +>> + * @blocks: source block array from 0 to (src_cnt-1) with the p destina= +tion >> + * at blocks[src_cnt] and q at blocks[src_cnt + 1]. Only one of two >> + * destinations may be present (another then has to be set to NULL). >> + * By default, the result of calculations is XOR-ed with the initial >> + * content of the destinationa buffers. Use ASYNC_TX_PQ_ZERO_x flags >> + * to avoid this. ->> + * NOTE: client code must assume the contents of this array are destroyed +>> + * NOTE: client code must assume the contents of this array are des= +troyed >> + * @scfs: array of source coefficients used in GF-multiplication >> + * @offset: offset in pages to start transaction >> + * @src_cnt: number of source pages >> + * @len: length in bytes ->> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHERENT, +>> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHE= +RENT, >> + * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ASYNC_TX_ASYNC_ONLY >> + * @depend_tx: depends on the result of this transaction. >> + * @cb_fn: function to call when the operation completes @@ -292,52 +312,59 @@ without these flags (see above). >> + struct dma_async_tx_descriptor *depend_tx, >> + dma_async_tx_callback cb_fn, void *cb_param) >> +{ ->> + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ, +>> + struct dma_chan *chan =3D async_tx_find_channel(depend_tx, DMA_P= +Q, >> + &blocks[src_cnt], 2, >> + blocks, src_cnt, len); ->> + struct dma_device *device = chan ? chan->device : NULL; ->> + struct dma_async_tx_descriptor *tx = NULL; +>> + struct dma_device *device =3D chan ? chan->device : NULL; +>> + struct dma_async_tx_descriptor *tx =3D NULL; >> + >> + if (!device && (flags & ASYNC_TX_ASYNC_ONLY)) >> + return NULL; >> + >> + if (device) { >> + /* run pq asynchronously */ ->> + tx = do_async_pq(chan, blocks, scfs, offset, src_cnt, +>> + tx =3D do_async_pq(chan, blocks, scfs, offset, src_cnt, >> + len, flags, depend_tx, cb_fn,cb_param); >> + } else { >> + /* run pq synchronously */ >> + if (!blocks[src_cnt+1]) { ->> + struct page *pdst = blocks[src_cnt]; +>> + struct page *pdst =3D blocks[src_cnt]; >> + int i; >> + >> + /* Calculate P-parity only. >> + * As opposite to async_xor(), async_pq() assumes ->> + * that destinations are included into calculations, +>> + * that destinations are included into calculati= +ons, >> + * so we should re-arrange the xor src list to >> + * achieve the similar behavior. >> + */ >> + if (!(flags & ASYNC_TX_PQ_ZERO_P)) { ->> + /* If async_pq() user doesn't set ZERO flag, +>> + /* If async_pq() user doesn't set ZERO f= +lag, >> + * it's assumed that destination has some ->> + * reasonable data to include in calculations. ->> + * The destination must be at position 0, so +>> + * reasonable data to include in calcula= +tions. +>> + * The destination must be at position 0= +, so >> + * shift the sources and put pdst at the >> + * beginning of the list. >> + */ ->> + for (i = src_cnt - 1; i >= 0; i--) ->> + blocks[i+1] = blocks[i]; ->> + blocks[0] = pdst; +>> + for (i =3D src_cnt - 1; i >=3D 0; i--) +>> + blocks[i+1] =3D blocks[i]; +>> + blocks[0] =3D pdst; >> + src_cnt++; ->> + flags |= ASYNC_TX_XOR_DROP_DST; +>> + flags |=3D ASYNC_TX_XOR_DROP_DST; >> + } else { ->> + /* If async_pq() user want to clear P, then ->> + * this will be done automatically in async +>> + /* If async_pq() user want to clear P, t= +hen +>> + * this will be done automatically in as= +ync >> + * case, and with the help of ZERO_DST in >> + * the sync one. >> + */ ->> + flags &= ~ASYNC_TX_PQ_ZERO_P; ->> + flags |= ASYNC_TX_XOR_ZERO_DST; +>> + flags &=3D ~ASYNC_TX_PQ_ZERO_P; +>> + flags |=3D ASYNC_TX_XOR_ZERO_DST; >> + } >> + >> + return async_xor(pdst, blocks, offset, @@ -356,8 +383,10 @@ without these flags (see above). >> @@ -81,14 +81,28 @@ enum dma_transaction_type { >> * dependency chains ->> * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) ->> * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) +>> * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source b= +uffer(s) +>> * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destina= +tion(s) >> + * @DMA_PREP_HAVE_P - set if the destination list includes the correct >> + * address of P (P-parity should be handled) >> + * @DMA_PREP_HAVE_Q - set if the destination list includes the correct @@ -366,15 +395,15 @@ without these flags (see above). >> + * @DMA_PREP_ZERO_Q - set if Q has to be zeroed before proceeding >> */ >> enum dma_ctrl_flags { ->> DMA_PREP_INTERRUPT = (1 << 0), ->> DMA_CTRL_ACK = (1 << 1), ->> DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), ->> DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), +>> DMA_PREP_INTERRUPT =3D (1 << 0), +>> DMA_CTRL_ACK =3D (1 << 1), +>> DMA_COMPL_SKIP_SRC_UNMAP =3D (1 << 2), +>> DMA_COMPL_SKIP_DEST_UNMAP =3D (1 << 3), >> + ->> + DMA_PREP_HAVE_P = (1 << 4), ->> + DMA_PREP_HAVE_Q = (1 << 5), ->> + DMA_PREP_ZERO_P = (1 << 6), ->> + DMA_PREP_ZERO_Q = (1 << 7), +>> + DMA_PREP_HAVE_P =3D (1 << 4), +>> + DMA_PREP_HAVE_Q =3D (1 << 5), +>> + DMA_PREP_ZERO_P =3D (1 << 6), +>> + DMA_PREP_ZERO_Q =3D (1 << 7), >> }; >> >> +#define DMA_PCHECK_FAILED (1 << 0) diff --git a/a/content_digest b/N1/content_digest index 47176eb..51ff5b4 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -6,14 +6,12 @@ "To\0Dan Williams <dan.j.williams@intel.com>\0" "Cc\0linux-raid@vger.kernel.org" linuxppc-dev@ozlabs.org - dzu@denx.de wd@denx.de + dzu@denx.de " yanok@emcraft.com\0" "\00:1\0" "b\0" - "\n" - "\n" - " Hello Dan,\n" + "=0D=0A Hello Dan,\n" "\n" " Thanks for review. Some comments below.\n" "\n" @@ -25,81 +23,90 @@ ">> + * do_async_pq - asynchronously calculate P and/or Q\n" ">> + */\n" ">> +static struct dma_async_tx_descriptor *\n" - ">> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char *scfs,\n" - ">> + unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags,\n" + ">> +do_async_pq(struct dma_chan *chan, struct page **blocks, unsigned char =\n" + "*scfs,\n" + ">> + unsigned int offset, int src_cnt, size_t len, enum async_tx_flag=\n" + "s flags,\n" ">> + struct dma_async_tx_descriptor *depend_tx,\n" ">> + dma_async_tx_callback cb_fn, void *cb_param)\n" ">> +{\n" - ">> + struct dma_device *dma = chan->device;\n" + ">> + struct dma_device *dma =3D chan->device;\n" ">> + dma_addr_t dma_dest[2], dma_src[src_cnt];\n" - ">> + struct dma_async_tx_descriptor *tx = NULL;\n" + ">> + struct dma_async_tx_descriptor *tx =3D NULL;\n" ">> + dma_async_tx_callback _cb_fn;\n" ">> + void *_cb_param;\n" - ">> + unsigned char *scf = NULL;\n" - ">> + int i, src_off = 0;\n" + ">> + unsigned char *scf =3D NULL;\n" + ">> + int i, src_off =3D 0;\n" ">> + unsigned short pq_src_cnt;\n" ">> + enum async_tx_flags async_flags;\n" - ">> + enum dma_ctrl_flags dma_flags = 0;\n" + ">> + enum dma_ctrl_flags dma_flags =3D 0;\n" ">> +\n" ">> + /* If we won't handle src_cnt in one shot, then the following\n" ">> + * flag(s) will be set only on the first pass of prep_dma\n" ">> + */\n" ">> + if (flags & ASYNC_TX_PQ_ZERO_P)\n" - ">> + dma_flags |= DMA_PREP_ZERO_P;\n" + ">> + dma_flags |=3D DMA_PREP_ZERO_P;\n" ">> + if (flags & ASYNC_TX_PQ_ZERO_Q)\n" - ">> + dma_flags |= DMA_PREP_ZERO_Q;\n" + ">> + dma_flags |=3D DMA_PREP_ZERO_Q;\n" ">> +\n" - ">> + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */\n" + ">> + /* DMAs use destinations as sources, so use BIDIRECTIONAL mappin=\n" + "g */\n" ">> + if (blocks[src_cnt]) {\n" - ">> + dma_dest[0] = dma_map_page(dma->dev, blocks[src_cnt],\n" - ">> + offset, len, DMA_BIDIRECTIONAL);\n" - ">> + dma_flags |= DMA_PREP_HAVE_P;\n" + ">> + dma_dest[0] =3D dma_map_page(dma->dev, blocks[src_cnt],\n" + ">> + offset, len, DMA_BIDIRECTIONA=\n" + "L);\n" + ">> + dma_flags |=3D DMA_PREP_HAVE_P;\n" ">> + }\n" ">> + if (blocks[src_cnt+1]) {\n" - ">> + dma_dest[1] = dma_map_page(dma->dev, blocks[src_cnt+1],\n" - ">> + offset, len, DMA_BIDIRECTIONAL);\n" - ">> + dma_flags |= DMA_PREP_HAVE_Q;\n" + ">> + dma_dest[1] =3D dma_map_page(dma->dev, blocks[src_cnt+1],\n" + ">> + offset, len, DMA_BIDIRECTIONA=\n" + "L);\n" + ">> + dma_flags |=3D DMA_PREP_HAVE_Q;\n" ">> + }\n" ">> +\n" - ">> + for (i = 0; i < src_cnt; i++)\n" - ">> + dma_src[i] = dma_map_page(dma->dev, blocks[i],\n" + ">> + for (i =3D 0; i < src_cnt; i++)\n" + ">> + dma_src[i] =3D dma_map_page(dma->dev, blocks[i],\n" ">> + offset, len, DMA_TO_DEVICE);\n" ">> +\n" ">> + while (src_cnt) {\n" - ">> + async_flags = flags;\n" - ">> + pq_src_cnt = min(src_cnt, (int)dma->max_pq);\n" - ">> + /* if we are submitting additional pqs, leave the chain open,\n" - ">> + * clear the callback parameters, and leave the destination\n" + ">> + async_flags =3D flags;\n" + ">> + pq_src_cnt =3D min(src_cnt, (int)dma->max_pq);\n" + ">> + /* if we are submitting additional pqs, leave the chain =\n" + "open,\n" + ">> + * clear the callback parameters, and leave the destinat=\n" + "ion\n" ">> + * buffers mapped\n" ">> + */\n" ">> + if (src_cnt > pq_src_cnt) {\n" - ">> + async_flags &= ~ASYNC_TX_ACK;\n" - ">> + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;\n" - ">> + _cb_fn = NULL;\n" - ">> + _cb_param = NULL;\n" + ">> + async_flags &=3D ~ASYNC_TX_ACK;\n" + ">> + dma_flags |=3D DMA_COMPL_SKIP_DEST_UNMAP;\n" + ">> + _cb_fn =3D NULL;\n" + ">> + _cb_param =3D NULL;\n" ">> + } else {\n" - ">> + _cb_fn = cb_fn;\n" - ">> + _cb_param = cb_param;\n" + ">> + _cb_fn =3D cb_fn;\n" + ">> + _cb_param =3D cb_param;\n" ">> + }\n" ">> + if (_cb_fn)\n" - ">> + dma_flags |= DMA_PREP_INTERRUPT;\n" + ">> + dma_flags |=3D DMA_PREP_INTERRUPT;\n" ">> + if (scfs)\n" - ">> + scf = &scfs[src_off];\n" + ">> + scf =3D &scfs[src_off];\n" ">> +\n" ">> + /* Since we have clobbered the src_list we are committed\n" ">> + * to doing this asynchronously. Drivers force forward\n" ">> + * progress in case they can not provide a descriptor\n" ">> + */\n" - ">> + tx = dma->device_prep_dma_pq(chan, dma_dest,\n" - ">> + &dma_src[src_off], pq_src_cnt,\n" + ">> + tx =3D dma->device_prep_dma_pq(chan, dma_dest,\n" + ">> + &dma_src[src_off], pq_src_c=\n" + "nt,\n" ">> + scf, len, dma_flags);\n" ">> + if (unlikely(!tx))\n" ">> + async_tx_quiesce(&depend_tx);\n" ">> +\n" - ">> + /* spin wait for the preceeding transactions to complete */\n" + ">> + /* spin wait for the preceeding transactions to complete=\n" + " */\n" ">> + while (unlikely(!tx)) {\n" ">> + dma_async_issue_pending(chan);\n" - ">> + tx = dma->device_prep_dma_pq(chan, dma_dest,\n" + ">> + tx =3D dma->device_prep_dma_pq(chan, dma_dest,\n" ">> + &dma_src[src_off], pq_src_cnt,\n" ">> + scf, len, dma_flags);\n" ">> + }\n" @@ -107,20 +114,21 @@ ">> + async_tx_submit(chan, tx, async_flags, depend_tx,\n" ">> + _cb_fn, _cb_param);\n" ">> +\n" - ">> + depend_tx = tx;\n" - ">> + flags |= ASYNC_TX_DEP_ACK;\n" + ">> + depend_tx =3D tx;\n" + ">> + flags |=3D ASYNC_TX_DEP_ACK;\n" ">> +\n" ">> + if (src_cnt > pq_src_cnt) {\n" ">> + /* drop completed sources */\n" - ">> + src_cnt -= pq_src_cnt;\n" - ">> + src_off += pq_src_cnt;\n" + ">> + src_cnt -=3D pq_src_cnt;\n" + ">> + src_off +=3D pq_src_cnt;\n" ">> +\n" ">> + /* use the intermediate result as a source; we\n" ">> + * clear DMA_PREP_ZERO, so prep_dma_pq will\n" ">> + * include destination(s) into calculations. Thus\n" ">> + * keep DMA_PREP_HAVE_x in dma_flags only\n" ">> + */\n" - ">> + dma_flags &= (DMA_PREP_HAVE_P | DMA_PREP_HAVE_Q);\n" + ">> + dma_flags &=3D (DMA_PREP_HAVE_P | DMA_PREP_HAVE_=\n" + "Q);\n" "\n" "> I don't think this will work as we will be mixing Q into the new P and\n" "> P into the new Q. In order to support (src_cnt > device->max_pq) we\n" @@ -128,84 +136,88 @@ "> continued (DMA_PREP_CONTINUE) and to apply different coeffeicients to\n" "> P and Q to cancel the effect of including them as sources.\n" "\n" - " With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P \n" - "isn't mixed into new Q. For your example of max_pq=4:\n" + " With DMA_PREP_ZERO_P/Q approach, the Q isn't mixed into new P, and P=20\n" + "isn't mixed into new Q. For your example of max_pq=3D4:\n" "\n" - " p, q = PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10}))\n" + " p, q =3D PQ(src0, src1, src2, src3, src4, COEF({01}, {02}, {04}, {08}, {10=\n" + "}))\n" "\n" " with the current implementation will be split into:\n" "\n" - " p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})\n" - " p`,q` = PQ(src4, COEF({10}))\n" + " p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})\n" + " p`,q` =3D PQ(src4, COEF({10}))\n" "\n" " which will result to the following:\n" "\n" - " p = ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + src3\n" - " q = ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 + {04}*src2 + {08}*src3\n" - " \n" - " p` = p + src4\n" - " q` = q + {10}*src4\n" + " p =3D ((dma_flags & DMA_PREP_ZERO_P) ? 0 : old_p) + src0 + src1 + src2 + s=\n" + "rc3\n" + " q =3D ((dma_flags & DMA_PREP_ZERO_Q) ? 0 : old_q) + {01}*src0 + {02}*src1 =\n" + "+ {04}*src2 + {08}*src3\n" + "=20\n" + " p` =3D p + src4\n" + " q` =3D q + {10}*src4\n" "\n" - " But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will \n" + " But, if we get rid of DMA_PREP_ZERO_P/Q, then the mess with P/Q will=20\n" "have a place indeed.\n" "\n" "> Here is an\n" - "> example of supporting a 5 source pq operation where max_pq == 4 (the\n" + "> example of supporting a 5 source pq operation where max_pq =3D=3D 4 (the\n" "> minimum).\n" "\n" - "> p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))\n" - "> p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))\n" + "> p, q =3D PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))\n" + "> p', q' =3D PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))\n" "\n" - "> p' = p + q + q + src4 = p + src4 = P\n" - "> q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10)*src4 = Q\n" + "> p' =3D p + q + q + src4 =3D p + src4 =3D P\n" + "> q' =3D {00}*p + {01}*q + {00}*q + {10}*src4 =3D q + {10)*src4 =3D Q\n" "\n" "> ...at no point do we need to zero P or Q. Yes, this requires a lot of\n" "> extra work for incremental sources,\n" "\n" - " I would say, that 'very very lot'. In general this means that for \n" + " I would say, that 'very very lot'. In general this means that for=20\n" "the cases of N sources > max_pq we'll have to do:\n" "\n" - " C = 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA.\n" + " C =3D 1 + ceil((N-max_pq)/(max_pq - 3)) number of calls to ADMA.\n" "\n" - " E.g., for max_pq = 4:\n" + " E.g., for max_pq =3D 4:\n" "\n" - " N = 5 => C = 2,\n" - " N = 6 => C = 3,\n" + " N =3D 5 =3D> C =3D 2,\n" + " N =3D 6 =3D> C =3D 3,\n" " ..\n" - " N = 15 => C = 12,\n" - " N = 16 => C = 13,\n" + " N =3D 15 =3D> C =3D 12,\n" + " N =3D 16 =3D> C =3D 13,\n" " ..\n" - " N = 128 => C = 125.\n" + " N =3D 128 =3D> C =3D 125.\n" "\n" "\n" " If we stay with the current approach of using DMA_PREP_ZERO_P/Q, then\n" "\n" - " C = 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA.\n" + " C =3D 1 + ceil((N-max_pq)/max_pq)) number of calls to ADMA.\n" "\n" " And the same series will result to:\n" "\n" - " N = 5 => C = 2,\n" - " N = 6 => C = 2,\n" + " N =3D 5 =3D> C =3D 2,\n" + " N =3D 6 =3D> C =3D 2,\n" " ..\n" - " N = 15 => C = 4,\n" - " N = 16 => C = 4,\n" + " N =3D 15 =3D> C =3D 4,\n" + " N =3D 16 =3D> C =3D 4,\n" " ..\n" - " N = 128 => C = 32.\n" + " N =3D 128 =3D> C =3D 32.\n" "\n" "\n" - " I'm afraid that the difference (13/4, 125/32) is very significant, so \n" - "getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement \n" + " I'm afraid that the difference (13/4, 125/32) is very significant, so=20\n" + "getting rid of DMA_PREP_ZERO_P/Q will eat most of the improvement=20\n" "which could be achieved with the current approach.\n" "\n" - "> but at this point I do not see a cleaner alternatve for engines like iop13xx.\n" + "> but at this point I do not see a cleaner alternatve for engines like iop=\n" + "13xx.\n" "\n" - " I can't find any description of iop13xx processors at Intel's \n" + " I can't find any description of iop13xx processors at Intel's=20\n" "web-site, only 3xx:\n" "\n" - "http://www.intel.com/design/iio/index.htm?iid=ipp_embed+embed_io\n" + "http://www.intel.com/design/iio/index.htm?iid=3Dipp_embed+embed_io\n" "\n" - " So, it's hard for me to do any suggestions. I just wonder - doesn't \n" - "iop13xx allow users to program destination addresses into the sources \n" + " So, it's hard for me to do any suggestions. I just wonder - doesn't=20\n" + "iop13xx allow users to program destination addresses into the sources=20\n" "fields of descriptors?\n" "\n" ">> + } else\n" @@ -219,19 +231,22 @@ ">> + * do_sync_pq - synchronously calculate P and Q\n" ">> + */\n" ">> +static void\n" - ">> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offset,\n" + ">> +do_sync_pq(struct page **blocks, unsigned char *scfs, unsigned int offs=\n" + "et,\n" ">> + int src_cnt, size_t len, enum async_tx_flags flags,\n" ">> + struct dma_async_tx_descriptor *depend_tx,\n" ">> + dma_async_tx_callback cb_fn, void *cb_param)\n" ">> +{\n" ">> + int i, pos;\n" - ">> + uint8_t *p = NULL, *q = NULL, *src;\n" + ">> + uint8_t *p =3D NULL, *q =3D NULL, *src;\n" ">> +\n" ">> + /* set destination addresses */\n" ">> + if (blocks[src_cnt])\n" - ">> + p = (uint8_t *)(page_address(blocks[src_cnt]) + offset);\n" + ">> + p =3D (uint8_t *)(page_address(blocks[src_cnt]) + offset=\n" + ");\n" ">> + if (blocks[src_cnt+1])\n" - ">> + q = (uint8_t *)(page_address(blocks[src_cnt+1]) + offset);\n" + ">> + q =3D (uint8_t *)(page_address(blocks[src_cnt+1]) + offs=\n" + "et);\n" ">> +\n" ">> + if (flags & ASYNC_TX_PQ_ZERO_P) {\n" ">> + BUG_ON(!p);\n" @@ -243,13 +258,14 @@ ">> + memset(q, 0, len);\n" ">> + }\n" ">> +\n" - ">> + for (i = 0; i < src_cnt; i++) {\n" - ">> + src = (uint8_t *)(page_address(blocks[i]) + offset);\n" - ">> + for (pos = 0; pos < len; pos++) {\n" + ">> + for (i =3D 0; i < src_cnt; i++) {\n" + ">> + src =3D (uint8_t *)(page_address(blocks[i]) + offset);\n" + ">> + for (pos =3D 0; pos < len; pos++) {\n" ">> + if (p)\n" - ">> + p[pos] ^= src[pos];\n" + ">> + p[pos] ^=3D src[pos];\n" ">> + if (q)\n" - ">> + q[pos] ^= raid6_gfmul[scfs[i]][src[pos]];\n" + ">> + q[pos] ^=3D raid6_gfmul[scfs[i]][src[pos=\n" + "]];\n" ">> + }\n" ">> + }\n" ">> + async_tx_sync_epilog(cb_fn, cb_param);\n" @@ -259,41 +275,45 @@ "> contents of p and q, just regenerate from the current sources. This\n" "> kills another site where ASYNC_TX_PQ_ZERO_{P,Q} is used.\n" "\n" - " Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for \n" - "the most common cases of using async_pq, i.e. the parity generating. \n" - "The wrap-around async_gen_syndrome() function always set these flags \n" + " Well, perhaps you are right. The ASYNC_TX_PQ_ZERO_{P,Q} is set for=20\n" + "the most common cases of using async_pq, i.e. the parity generating.=20\n" + "The wrap-around async_gen_syndrome() function always set these flags=20\n" "before calling async_pq().\n" "\n" " The cases where ASYNC_TX_PQ_ZERO_{P,Q} isn't set are:\n" "\n" - "(a) async_pq can't process the sources in one short because of src_cnt > \n" - "max_pq, so it should re-use the intermediate results (destination) as \n" + "(a) async_pq can't process the sources in one short because of src_cnt >=20\n" + "max_pq, so it should re-use the intermediate results (destination) as=20\n" "the sources;\n" "\n" - "(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the \n" + "(b) async_r6_dd_recov() does XOR with async_pq() assuming re-using the=20\n" "destination as the source.\n" "\n" "\n" - " So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go \n" - "away, if there were no significant overheads in (a) implemented \n" + " So, I would say that ASYNC_TX_PQ_ZERO_{P,Q} should definitely go=20\n" + "away, if there were no significant overheads in (a) implemented=20\n" "without these flags (see above).\n" "\n" ">> +\n" ">> +/**\n" - ">> + * async_pq - attempt to do XOR and Galois calculations in parallel using\n" + ">> + * async_pq - attempt to do XOR and Galois calculations in parallel usi=\n" + "ng\n" ">> + * a dma engine.\n" - ">> + * @blocks: source block array from 0 to (src_cnt-1) with the p destination\n" + ">> + * @blocks: source block array from 0 to (src_cnt-1) with the p destina=\n" + "tion\n" ">> + * at blocks[src_cnt] and q at blocks[src_cnt + 1]. Only one of two\n" ">> + * destinations may be present (another then has to be set to NULL).\n" ">> + * By default, the result of calculations is XOR-ed with the initial\n" ">> + * content of the destinationa buffers. Use ASYNC_TX_PQ_ZERO_x flags\n" ">> + * to avoid this.\n" - ">> + * NOTE: client code must assume the contents of this array are destroyed\n" + ">> + * NOTE: client code must assume the contents of this array are des=\n" + "troyed\n" ">> + * @scfs: array of source coefficients used in GF-multiplication\n" ">> + * @offset: offset in pages to start transaction\n" ">> + * @src_cnt: number of source pages\n" ">> + * @len: length in bytes\n" - ">> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHERENT,\n" + ">> + * @flags: ASYNC_TX_PQ_ZERO_P, ASYNC_TX_PQ_ZERO_Q, ASYNC_TX_ASSUME_COHE=\n" + "RENT,\n" ">> + * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ASYNC_TX_ASYNC_ONLY\n" ">> + * @depend_tx: depends on the result of this transaction.\n" ">> + * @cb_fn: function to call when the operation completes\n" @@ -305,52 +325,59 @@ ">> + struct dma_async_tx_descriptor *depend_tx,\n" ">> + dma_async_tx_callback cb_fn, void *cb_param)\n" ">> +{\n" - ">> + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ,\n" + ">> + struct dma_chan *chan =3D async_tx_find_channel(depend_tx, DMA_P=\n" + "Q,\n" ">> + &blocks[src_cnt], 2,\n" ">> + blocks, src_cnt, len);\n" - ">> + struct dma_device *device = chan ? chan->device : NULL;\n" - ">> + struct dma_async_tx_descriptor *tx = NULL;\n" + ">> + struct dma_device *device =3D chan ? chan->device : NULL;\n" + ">> + struct dma_async_tx_descriptor *tx =3D NULL;\n" ">> +\n" ">> + if (!device && (flags & ASYNC_TX_ASYNC_ONLY))\n" ">> + return NULL;\n" ">> +\n" ">> + if (device) {\n" ">> + /* run pq asynchronously */\n" - ">> + tx = do_async_pq(chan, blocks, scfs, offset, src_cnt,\n" + ">> + tx =3D do_async_pq(chan, blocks, scfs, offset, src_cnt,\n" ">> + len, flags, depend_tx, cb_fn,cb_param);\n" ">> + } else {\n" ">> + /* run pq synchronously */\n" ">> + if (!blocks[src_cnt+1]) {\n" - ">> + struct page *pdst = blocks[src_cnt];\n" + ">> + struct page *pdst =3D blocks[src_cnt];\n" ">> + int i;\n" ">> +\n" ">> + /* Calculate P-parity only.\n" ">> + * As opposite to async_xor(), async_pq() assumes\n" - ">> + * that destinations are included into calculations,\n" + ">> + * that destinations are included into calculati=\n" + "ons,\n" ">> + * so we should re-arrange the xor src list to\n" ">> + * achieve the similar behavior.\n" ">> + */\n" ">> + if (!(flags & ASYNC_TX_PQ_ZERO_P)) {\n" - ">> + /* If async_pq() user doesn't set ZERO flag,\n" + ">> + /* If async_pq() user doesn't set ZERO f=\n" + "lag,\n" ">> + * it's assumed that destination has some\n" - ">> + * reasonable data to include in calculations.\n" - ">> + * The destination must be at position 0, so\n" + ">> + * reasonable data to include in calcula=\n" + "tions.\n" + ">> + * The destination must be at position 0=\n" + ", so\n" ">> + * shift the sources and put pdst at the\n" ">> + * beginning of the list.\n" ">> + */\n" - ">> + for (i = src_cnt - 1; i >= 0; i--)\n" - ">> + blocks[i+1] = blocks[i];\n" - ">> + blocks[0] = pdst;\n" + ">> + for (i =3D src_cnt - 1; i >=3D 0; i--)\n" + ">> + blocks[i+1] =3D blocks[i];\n" + ">> + blocks[0] =3D pdst;\n" ">> + src_cnt++;\n" - ">> + flags |= ASYNC_TX_XOR_DROP_DST;\n" + ">> + flags |=3D ASYNC_TX_XOR_DROP_DST;\n" ">> + } else {\n" - ">> + /* If async_pq() user want to clear P, then\n" - ">> + * this will be done automatically in async\n" + ">> + /* If async_pq() user want to clear P, t=\n" + "hen\n" + ">> + * this will be done automatically in as=\n" + "ync\n" ">> + * case, and with the help of ZERO_DST in\n" ">> + * the sync one.\n" ">> + */\n" - ">> + flags &= ~ASYNC_TX_PQ_ZERO_P;\n" - ">> + flags |= ASYNC_TX_XOR_ZERO_DST;\n" + ">> + flags &=3D ~ASYNC_TX_PQ_ZERO_P;\n" + ">> + flags |=3D ASYNC_TX_XOR_ZERO_DST;\n" ">> + }\n" ">> +\n" ">> + return async_xor(pdst, blocks, offset,\n" @@ -369,8 +396,10 @@ "\n" ">> @@ -81,14 +81,28 @@ enum dma_transaction_type {\n" ">> * dependency chains\n" - ">> * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)\n" - ">> * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)\n" + ">> * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source b=\n" + "uffer(s)\n" + ">> * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destina=\n" + "tion(s)\n" ">> + * @DMA_PREP_HAVE_P - set if the destination list includes the correct\n" ">> + * address of P (P-parity should be handled)\n" ">> + * @DMA_PREP_HAVE_Q - set if the destination list includes the correct\n" @@ -379,15 +408,15 @@ ">> + * @DMA_PREP_ZERO_Q - set if Q has to be zeroed before proceeding\n" ">> */\n" ">> enum dma_ctrl_flags {\n" - ">> DMA_PREP_INTERRUPT = (1 << 0),\n" - ">> DMA_CTRL_ACK = (1 << 1),\n" - ">> DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),\n" - ">> DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),\n" + ">> DMA_PREP_INTERRUPT =3D (1 << 0),\n" + ">> DMA_CTRL_ACK =3D (1 << 1),\n" + ">> DMA_COMPL_SKIP_SRC_UNMAP =3D (1 << 2),\n" + ">> DMA_COMPL_SKIP_DEST_UNMAP =3D (1 << 3),\n" ">> +\n" - ">> + DMA_PREP_HAVE_P = (1 << 4),\n" - ">> + DMA_PREP_HAVE_Q = (1 << 5),\n" - ">> + DMA_PREP_ZERO_P = (1 << 6),\n" - ">> + DMA_PREP_ZERO_Q = (1 << 7),\n" + ">> + DMA_PREP_HAVE_P =3D (1 << 4),\n" + ">> + DMA_PREP_HAVE_Q =3D (1 << 5),\n" + ">> + DMA_PREP_ZERO_P =3D (1 << 6),\n" + ">> + DMA_PREP_ZERO_Q =3D (1 << 7),\n" ">> };\n" ">>\n" ">> +#define DMA_PCHECK_FAILED (1 << 0)\n" @@ -404,4 +433,4 @@ " Yuri Tikhonov, Senior Software Engineer\n" Emcraft Systems, www.emcraft.com -3bc740d2b99bd8721cfc2c1bb4cd054b7047823538a26b2ac1621968b39830f6 +01f92c8ef0789e915350bb1da606d02d6a11d2dd23ce74b92086b1e283f3978a
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.