Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
refactor: update implementation for sspmv
  • Loading branch information
aman-095 committed Aug 25, 2024
commit 0d71234301ca239a989d749741055c4f52622692
139 changes: 139 additions & 0 deletions lib/node_modules/@stdlib/blas/base/sspmv/lib/base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/**
* @license Apache-2.0
*
* Copyright (c) 2024 The Stdlib Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

// MODULES //

var sfill = require( '@stdlib/blas/ext/base/sfill' ).ndarray;
var sscal = require( '@stdlib/blas/base/sscal' ).ndarray;
var f32 = require( '@stdlib/number/float64/base/to-float32' );


// MAIN //

/**
* Performs the matrix-vector operation `y = α*A*x + β*y` where `α` and `β` are scalars, `x` and `y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form.
*
* @private
* @param {string} order - storage layout
* @param {string} uplo - specifies whether the upper or lower triangular part of the symmetric matrix `A` is supplied
* @param {NonNegativeInteger} N - number of elements along each dimension of `A`
* @param {number} alpha - scalar constant
* @param {Float32Array} AP - packed form of a symmetric matrix `A`
* @param {integer} strideAP - `AP` stride length
* @param {NonNegativeInteger} offsetAP - starting index for `AP`
* @param {Float32Array} x - first input array
* @param {integer} strideX - `x` stride length
* @param {NonNegativeInteger} offsetX - starting `x` index
* @param {number} beta - scalar constant
* @param {Float32Array} y - second input array
* @param {integer} strideY - `y` stride length
* @param {NonNegativeInteger} offsetY - starting `y` index
* @returns {Float32Array} `y`
*
* @example
* var Float32Array = require( '@stdlib/array/float32' );
*
* var AP = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
* var x = new Float32Array( [ 1.0, 1.0, 1.0 ] );
* var y = new Float32Array( [ 1.0, 1.0, 1.0 ] );
*
* sspmv( 'column-major', 'lower', 3, 1.0, AP, 1, 0, x, 1, 0, 1.0, y, 1, 0 );
* // y => <Float32Array>[ ~7.0, ~12.0, ~15.0 ]
*/
function sspmv( order, uplo, N, alpha, AP, strideAP, offsetAP, x, strideX, offsetX, beta, y, strideY, offsetY ) { // eslint-disable-line max-params, max-len
var isrm;
var tmp1;
var tmp2;
var iap;
var ix0;
var ix1;
var iy0;
var iy1;
var i1;
var i0;
var kk;
var ox;
var oy;

if ( beta !== 1.0 ) {
if ( beta === 0.0 ) {
sfill( N, beta, y, strideY, offsetY );
} else {
sscal( N, beta, y, strideY, offsetY );
}
}
if ( alpha === 0.0 ) {
return y;
}
isrm = ( order === 'row-major' );
kk = offsetAP;
ox = offsetX;
oy = offsetY;
if ( ( !isrm && uplo === 'upper' ) || ( isrm && uplo === 'lower' ) ) {
ix1 = ox;
iy1 = oy;
for ( i1 = 0; i1 < N; i1++ ) {
tmp1 = f32( alpha * x[ ix1 ] );
tmp2 = 0.0;
ix0 = ox;
iy0 = oy;
iap = kk;
for ( i0 = 0; i0 < i1; i0++ ) {
y[ iy0 ] += tmp1 * AP[ iap ];
tmp2 += AP[ iap ] * x[ ix0 ];
ix0 += strideX;
iy0 += strideY;
iap += strideAP;
}
y[ iy1 ] += f32( f32( tmp1 * AP[ iap ] ) + f32( alpha * tmp2 ) );
ix1 += strideX;
iy1 += strideY;
kk += ( i1 + 1 ) * strideAP;
}
return y;
}
ix1 = ox;
iy1 = oy;
for ( i1 = 0; i1 < N; i1++ ) {
tmp1 = f32( alpha * x[ ix1 ] );
tmp2 = 0.0;
iap = kk;
y[ iy1 ] += f32( tmp1 * AP[ iap ] );
ix0 = ix1;
iy0 = iy1;
for ( i0 = i1+1; i0 < N; i0++ ) {
ix0 += strideX;
iy0 += strideY;
iap += strideAP;
y[ iy0 ] += tmp1 * AP[ iap ];
tmp2 += AP[ iap ] * x[ ix0 ];
}
y[ iy1 ] += f32( alpha * tmp2 );
ix1 += strideX;
iy1 += strideY;
kk += ( N - i1 ) * strideAP;
}
return y;
}


// EXPORTS //

module.exports = sspmv;
4 changes: 2 additions & 2 deletions lib/node_modules/@stdlib/blas/base/sspmv/lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
'use strict';

/**
* BLAS level 2 routine to perform the matrix-vector operation `y = alpha*A*x + beta*y` where `alpha` and `beta` are scalars, `x` and `y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form.
* BLAS level 2 routine to perform the matrix-vector operation `y = α*A*x + β*y` where `α` and `β` are scalars, `x` and `y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form.
*
* @module @stdlib/blas/base/sspmv
*
Expand All @@ -42,7 +42,7 @@
* var x = new Float32Array( [ 1.0, 1.0, 1.0 ] );
* var y = new Float32Array( [ 1.0, 1.0, 1.0 ] );
*
* sspmv.ndarray( 'column-major', 'lower', 3, 1.0, AP, x, 1, 0, 1.0, y, 1, 0 );
* sspmv.ndarray( 'column-major', 'lower', 3, 1.0, AP, 1, 0, x, 1, 0, 1.0, y, 1, 0 );
* // y => <Float32Array>[ ~7.0, ~12.0, ~15.0 ]
*/

Expand Down
106 changes: 20 additions & 86 deletions lib/node_modules/@stdlib/blas/base/sspmv/lib/ndarray.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,24 @@

// MODULES //

var sfill = require( '@stdlib/blas/ext/base/sfill' ).ndarray;
var sscal = require( '@stdlib/blas/base/sscal' ).ndarray;
var f32 = require( '@stdlib/number/float64/base/to-float32' );
var isLayout = require( '@stdlib/blas/base/assert/is-layout' );
var isMatrixTriangle = require( '@stdlib/blas/base/assert/is-matrix-triangle' );
var format = require( '@stdlib/string/format' );
var base = require( './base.js' );


// MAIN //

/**
* Performs the matrix-vector operation `y = alpha*A*x + beta*y` where `alpha` and `beta` are scalars, `x` and `y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form.
* Performs the matrix-vector operation `y = α*A*x + β*y` where `α` and `β` are scalars, `x` and `y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form.
*
* @param {string} order - storage layout
* @param {string} uplo - specifies whether the upper or lower triangular part of the symmetric matrix `A` is supplied
* @param {NonNegativeInteger} N - number of elements along each dimension of `A`
* @param {number} alpha - scalar constant
* @param {Float32Array} AP - packed form of a symmetric matrix `A`
* @param {integer} strideAP - `AP` stride length
* @param {NonNegativeInteger} offsetAP - starting index for `AP`
* @param {Float32Array} x - first input array
* @param {integer} strideX - `x` stride length
* @param {NonNegativeInteger} offsetX - starting `x` index
Expand All @@ -47,111 +48,44 @@ var isMatrixTriangle = require( '@stdlib/blas/base/assert/is-matrix-triangle' );
* @throws {TypeError} first argument must be a valid order
* @throws {TypeError} second argument must specify whether the lower or upper triangular matrix is supplied
* @throws {RangeError} third argument must be a nonnegative integer
* @throws {RangeError} seventh argument must be non-zero
* @throws {RangeError} eleventh argument must be non-zero
* @throws {RangeError} sixth argument must be non-zero
* @throws {RangeError} ninth argument must be non-zero
* @throws {RangeError} thirteenth argument must be non-zero
* @returns {Float32Array} `y`
*
* @example
* var Float32Array = require( '@stdlib/array/float32' );
*
* var A = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
* var AP = new Float32Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] );
* var x = new Float32Array( [ 1.0, 1.0, 1.0 ] );
* var y = new Float32Array( [ 1.0, 1.0, 1.0 ] );
*
* sspmv( 'column-major', 'lower', 3, 1.0, A, x, 1, 0, 1.0, y, 1, 0 );
* sspmv( 'column-major', 'lower', 3, 1.0, AP, 1, 0, x, 1, 0, 1.0, y, 1, 0 );
* // y => <Float32Array>[ ~7.0, ~12.0, ~15.0 ]
*/
function sspmv( order, uplo, N, alpha, AP, x, strideX, offsetX, beta, y, strideY, offsetY ) { // eslint-disable-line max-params, max-len
var temp1;
var temp2;
var ix;
var iy;
var jx;
var jy;
var kk;
var kx;
var ky;
var j;
var k;

function sspmv( order, uplo, N, alpha, AP, strideAP, offsetAP, x, strideX, offsetX, beta, y, strideY, offsetY ) { // eslint-disable-line max-params, max-len
if ( !isLayout( order ) ) {
throw new TypeError( 'invalid argument. First argument must be a valid order. Value: `%s`.', order );
throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) );
}
if ( !isMatrixTriangle( uplo ) ) {
throw new TypeError( 'invalid argument. Second argument must specify whether the lower or upper triangular matrix is supplied. Value: `%s`.', uplo );
throw new TypeError( format( 'invalid argument. Second argument must specify whether the lower or upper triangular matrix is supplied. Value: `%s`.', uplo ) );
}
if ( N < 0 ) {
throw new RangeError( 'invalid argument. Third argument must be a nonnegative integer. Value: `%d`.', N );
throw new RangeError( format( 'invalid argument. Third argument must be a nonnegative integer. Value: `%d`.', N ) );
}
if ( strideAP === 0 ) {
throw new RangeError( format( 'invalid argument. Sixth argument must be non-zero. Value: `%d`.', strideAP ) );
}
if ( strideX === 0 ) {
throw new RangeError( 'invalid argument. Seventh argument must be non-zero. Value: `%d`.', strideX );
throw new RangeError( format( 'invalid argument. Ninth argument must be non-zero. Value: `%d`.', strideX ) );
}
if ( strideY === 0 ) {
throw new RangeError( 'invalid argument. Eleventh argument must be non-zero. Value: `%d`.', strideY );
throw new RangeError( format( 'invalid argument. Thirteenth argument must be non-zero. Value: `%d`.', strideY ) );
}
if ( N === 0 || ( alpha === 0.0 && beta === 1.0 ) ) {
return y;
}
// Form: y = beta*y
if ( beta !== 1.0 ) {
if ( beta === 0.0 ) {
sfill( N, 0.0, y, strideY, offsetY );
} else {
sscal( N, beta, y, strideY, offsetY );
}
}
if ( alpha === 0.0 ) {
return y;
}
// Form: y = alpha*A*x + y
kx = offsetX;
ky = offsetY;
kk = 0;
if (
( order === 'row-major' && uplo === 'upper' ) ||
( order === 'column-major' && uplo === 'lower' )
) {
jx = kx;
jy = ky;
for ( j = 0; j < N; j++ ) {
temp1 = f32( alpha * x[ jx ] );
temp2 = 0.0;
y[ jy ] += f32( temp1 * AP[ kk ] );
ix = jx;
iy = jy;
for ( k = kk + 1; k < kk + N - j; k++ ) {
ix += strideX;
iy += strideY;
y[ iy ] += f32( temp1 * AP[ k ] );
temp2 += f32( AP[ k ] * x[ ix ] );
}
y[ jy ] += f32( alpha * temp2 );
jx += strideX;
jy += strideY;
kk += N - j;
}
return y;
}
// ( order === 'row-major' && uplo === 'lower') || ( order === 'column-major' && uplo === 'upper' )
jx = kx;
jy = ky;
for ( j = 0; j < N; j++ ) {
temp1 = f32( alpha * x[ jx ] );
temp2 = 0.0;
ix = kx;
iy = ky;
for ( k = kk; k < kk + j; k++ ) {
y[ iy ] += f32( temp1 * AP[ k ] );
temp2 += f32( AP[ k ] * x[ ix ] );
ix += strideX;
iy += strideY;
}
y[ jy ] += f32( f32( temp1 * AP[ kk + j ] ) + f32( alpha * temp2 ) );
jx += strideX;
jy += strideY;
kk += j + 1;
}
return y;
return base( order, uplo, N, alpha, AP, strideAP, offsetAP, x, strideX, offsetX, beta, y, strideY, offsetY );
}


Expand Down
Loading