@@ -8,16 +8,16 @@ package blob
88
99import (
1010 "context"
11- "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
12- "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
1311 "io"
1412 "os"
1513 "sync"
1614 "time"
1715
1816 "github.com/Azure/azure-sdk-for-go/sdk/azcore"
17+ "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
1918 "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
2019 "github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming"
20+ "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
2121 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/internal/base"
2222 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/internal/exported"
2323 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/internal/generated"
@@ -324,8 +324,8 @@ func (b *Client) GetSASURL(permissions sas.BlobPermissions, expiry time.Time, o
324324
325325// Concurrent Download Functions -----------------------------------------------------------------------------------------
326326
327- // download downloads an Azure blob to a WriterAt in parallel.
328- func (b * Client ) download (ctx context.Context , writer io.WriterAt , o downloadOptions ) (int64 , error ) {
327+ // downloadBuffer downloads an Azure blob to a WriterAt in parallel.
328+ func (b * Client ) downloadBuffer (ctx context.Context , writer io.WriterAt , o downloadOptions ) (int64 , error ) {
329329 if o .BlockSize == 0 {
330330 o .BlockSize = DefaultDownloadBlockSize
331331 }
@@ -353,6 +353,7 @@ func (b *Client) download(ctx context.Context, writer io.WriterAt, o downloadOpt
353353 OperationName : "downloadBlobToWriterAt" ,
354354 TransferSize : count ,
355355 ChunkSize : o .BlockSize ,
356+ NumChunks : uint16 (((count - 1 ) / o .BlockSize ) + 1 ),
356357 Concurrency : o .Concurrency ,
357358 Operation : func (ctx context.Context , chunkStart int64 , count int64 ) error {
358359 downloadBlobOptions := o .getDownloadBlobOptions (HTTPRange {
@@ -391,6 +392,168 @@ func (b *Client) download(ctx context.Context, writer io.WriterAt, o downloadOpt
391392 return count , nil
392393}
393394
395+ // downloadFile downloads an Azure blob to a Writer. The blocks are downloaded parallely,
396+ // but written to file serially
397+ func (b * Client ) downloadFile (ctx context.Context , writer io.Writer , o downloadOptions ) (int64 , error ) {
398+ ctx , cancel := context .WithCancel (ctx )
399+ defer cancel ()
400+ if o .BlockSize == 0 {
401+ o .BlockSize = DefaultDownloadBlockSize
402+ }
403+
404+ if o .Concurrency == 0 {
405+ o .Concurrency = DefaultConcurrency
406+ }
407+
408+ count := o .Range .Count
409+ if count == CountToEnd { //Calculate size if not specified
410+ gr , err := b .GetProperties (ctx , o .getBlobPropertiesOptions ())
411+ if err != nil {
412+ return 0 , err
413+ }
414+ count = * gr .ContentLength - o .Range .Offset
415+ }
416+
417+ if count <= 0 {
418+ // The file is empty, there is nothing to download.
419+ return 0 , nil
420+ }
421+
422+ progress := int64 (0 )
423+ progressLock := & sync.Mutex {}
424+
425+ // helper routine to get body
426+ getBodyForRange := func (ctx context.Context , chunkStart , size int64 ) (io.ReadCloser , error ) {
427+ downloadBlobOptions := o .getDownloadBlobOptions (HTTPRange {
428+ Offset : chunkStart + o .Range .Offset ,
429+ Count : size ,
430+ }, nil )
431+ dr , err := b .DownloadStream (ctx , downloadBlobOptions )
432+ if err != nil {
433+ return nil , err
434+ }
435+
436+ var body io.ReadCloser = dr .NewRetryReader (ctx , & o .RetryReaderOptionsPerBlock )
437+ if o .Progress != nil {
438+ rangeProgress := int64 (0 )
439+ body = streaming .NewResponseProgress (
440+ body ,
441+ func (bytesTransferred int64 ) {
442+ diff := bytesTransferred - rangeProgress
443+ rangeProgress = bytesTransferred
444+ progressLock .Lock ()
445+ progress += diff
446+ o .Progress (progress )
447+ progressLock .Unlock ()
448+ })
449+ }
450+
451+ return body , nil
452+ }
453+
454+ // if file fits in a single buffer, we'll download here.
455+ if count <= o .BlockSize {
456+ body , err := getBodyForRange (ctx , int64 (0 ), count )
457+ if err != nil {
458+ return 0 , err
459+ }
460+ defer body .Close ()
461+
462+ return io .Copy (writer , body )
463+ }
464+
465+ buffers := shared .NewMMBPool (int (o .Concurrency ), o .BlockSize )
466+ defer buffers .Free ()
467+ aquireBuffer := func () ([]byte , error ) {
468+ select {
469+ case b := <- buffers .Acquire ():
470+ // got a buffer
471+ return b , nil
472+ default :
473+ // no buffer available; allocate a new buffer if possible
474+ if _ , err := buffers .Grow (); err != nil {
475+ return nil , err
476+ }
477+
478+ // either grab the newly allocated buffer or wait for one to become available
479+ return <- buffers .Acquire (), nil
480+ }
481+ }
482+
483+ numChunks := uint16 ((count - 1 )/ o .BlockSize ) + 1
484+ blocks := make ([]chan []byte , numChunks )
485+ for b := range blocks {
486+ blocks [b ] = make (chan []byte )
487+ }
488+
489+ /*
490+ * We have created as many channels as the number of chunks we have.
491+ * Each downloaded block will be sent to the channel matching its
492+ * sequece number, i.e. 0th block is sent to 0th channel, 1st block
493+ * to 1st channel and likewise. The blocks are then read and written
494+ * to the file serially by below goroutine. Do note that the blocks
495+ * blocks are still downloaded parallelly from n/w, only serailized
496+ * and written to file here.
497+ */
498+ writerError := make (chan error )
499+ go func (ch chan error ) {
500+ for _ , block := range blocks {
501+ select {
502+ case <- ctx .Done ():
503+ return
504+ case block := <- block :
505+ _ , err := writer .Write (block )
506+ buffers .Release (block )
507+ if err != nil {
508+ ch <- err
509+ return
510+ }
511+ }
512+ }
513+ ch <- nil
514+ }(writerError )
515+
516+ // Prepare and do parallel download.
517+ err := shared .DoBatchTransfer (ctx , & shared.BatchTransferOptions {
518+ OperationName : "downloadBlobToWriterAt" ,
519+ TransferSize : count ,
520+ ChunkSize : o .BlockSize ,
521+ NumChunks : numChunks ,
522+ Concurrency : o .Concurrency ,
523+ Operation : func (ctx context.Context , chunkStart int64 , count int64 ) error {
524+ buff , err := aquireBuffer ()
525+ if err != nil {
526+ return err
527+ }
528+
529+ body , err := getBodyForRange (ctx , chunkStart , count )
530+ if err != nil {
531+ buffers .Release (buff )
532+ return nil
533+ }
534+
535+ _ , err = io .ReadFull (body , buff [:count ])
536+ body .Close ()
537+ if err != nil {
538+ return err
539+ }
540+
541+ blockIndex := (chunkStart / o .BlockSize )
542+ blocks [blockIndex ] <- buff
543+ return nil
544+ },
545+ })
546+
547+ if err != nil {
548+ return 0 , err
549+ }
550+ // error from writer thread.
551+ if err = <- writerError ; err != nil {
552+ return 0 , err
553+ }
554+ return count , nil
555+ }
556+
394557// DownloadStream reads a range of bytes from a blob. The response also includes the blob's properties and metadata.
395558// For more information, see https://docs.microsoft.com/rest/api/storageservices/get-blob.
396559func (b * Client ) DownloadStream (ctx context.Context , o * DownloadStreamOptions ) (DownloadStreamResponse , error ) {
@@ -419,7 +582,7 @@ func (b *Client) DownloadBuffer(ctx context.Context, buffer []byte, o *DownloadB
419582 if o == nil {
420583 o = & DownloadBufferOptions {}
421584 }
422- return b .download (ctx , shared .NewBytesWriter (buffer ), (downloadOptions )(* o ))
585+ return b .downloadBuffer (ctx , shared .NewBytesWriter (buffer ), (downloadOptions )(* o ))
423586}
424587
425588// DownloadFile downloads an Azure blob to a local file.
@@ -458,7 +621,7 @@ func (b *Client) DownloadFile(ctx context.Context, file *os.File, o *DownloadFil
458621 }
459622
460623 if size > 0 {
461- return b .download (ctx , file , * do )
624+ return b .downloadFile (ctx , file , * do )
462625 } else { // if the blob's size is 0, there is no need in downloading it
463626 return 0 , nil
464627 }
0 commit comments