Skip to content
This repository was archived by the owner on Jun 8, 2023. It is now read-only.

Conversation

@pramodk
Copy link
Collaborator

@pramodk pramodk commented Apr 3, 2022

  • all global variables are now wrapped into
    struct called _GlobalVars.
  • previous global variables are still printed
    but their values is now copied into _GlobalVars
    during nrn_init.
  • variables like slist and dlist are now removed
  • we now only copy _GlobalVars into GPU; all other
    acc copyin / update for global variables is now
    removed.
  • we now print all global variables as static and
    remove all their macro renaming.

Todos:

  • basic implementation / draft removing all global usage
  • run neuron test suite on CPU
  • run neuron test suite on GPU
  • fix TODOs
  • code cleanup including memory cleanup

Related to BlueBrain/CoreNeuron/issues/141

@pramodk
Copy link
Collaborator Author

pramodk commented Apr 3, 2022

@nrnhines: the changes in the test directory gives an idea of how generated code looks like. All coreneuron tests under NEURON are passing on CPU which means there are "obvious" issues. As this code is still C, I went with the existing style and with minimal changes as part of this PR.

cc: @olupton

May be you can take a look at the changes and generated code carefully and see if there are pitfalls? I compiled MOD files from https://github.com/neuronsimulator/testcorenrn and here is difference in .cpp files for master branch vs. this PR:

diff --git a/mod/Gfluct3.cpp b/mod/Gfluct3.cpp
index 4bc3bef..12ebc93 100644
--- a/mod/Gfluct3.cpp
+++ b/mod/Gfluct3.cpp
@@ -177,10 +177,7 @@ void _net_buf_receive(NrnThread*);
  static double _hoc_oup();

 #endif /*BBCORE*/
-
-#define _mechtype _mechtype_Gfluct3
-int _mechtype;
-#pragma acc declare copyin (_mechtype)
+ static int _mechtype;
  static int _pointtype;

 #if 0 /*BBCORE*/
@@ -227,10 +224,6 @@ int _mechtype;
  inline double mynormrand( _threadargsprotocomma_ double , double );
  /* declare global and static user variables */

-static void _acc_globals_update() {
- }
-
-
 #if 0 /*BBCORE*/
  /* some parameters have upper and lower limits */
  static HocParmLimits _hoc_parm_limits[] = {
@@ -369,6 +362,21 @@ static void nrn_alloc(double* _p, Datum* _ppvar, int _type) {
  	hoc_reg_ba(_mechtype, _ba1, 11);
  	hoc_register_var(hoc_scdoub, hoc_vdoub, NULL);
  }
+ struct _GlobalVars {
+   int _mechtype;
+ };
+
+ static _GlobalVars _global_variables;
+ static _GlobalVars* _global_variables_ptr;
+
+
+static void _update_global_variables() {
+   _global_variables._mechtype = _mechtype;
+   #pragma acc enter data copyin(_global_variables[0:1]) if(nrn_threads->compute_gpu)
+ }
+
+ #define _mechtype _global_variables_ptr->_mechtype
+
 static const char *modelname = "Fluctuating conductances";

 static int error;
@@ -874,8 +882,7 @@ double _v, v; int* _ni; int _iml, _cntml_padded, _cntml_actual;
 _cntml_actual = _ml->_nodecount;
 _cntml_padded = _ml->_nodecount_padded;
 _thread = _ml->_thread;
-  #pragma acc update device (_mechtype) if(_nt->compute_gpu)
-_acc_globals_update();
+_update_global_variables();
 double * _nt_data = _nt->_data;
 double * _vec_v = _nt->_actual_v;
 int stream_id = _nt->stream_id;
@@ -1019,6 +1026,7 @@ void nrn_state(NrnThread* _nt, Memb_list* _ml, int _type) {
 static void terminal(){}

 static void _initlists(){
+ _global_variables_ptr = &_global_variables;
  double _x; double* _p = &_x;
  int _i; static int _first = 1;
  int _cntml_actual=1;
diff --git a/mod/hhderiv.cpp b/mod/hhderiv.cpp
index 1245358..b420162 100644
--- a/mod/hhderiv.cpp
+++ b/mod/hhderiv.cpp
@@ -166,9 +166,6 @@
  static ThreadDatum* _extcall_thread;
  /* external NEURON variables */
  extern double celsius;
- #define _celsius_ _celsius__hhderiv
-double _celsius_;
-#pragma acc declare copyin(_celsius_)

 #if 0 /*BBCORE*/
  /* declaration of user functions */
@@ -193,13 +190,6 @@ double _celsius_;
  inline double vtrap( _threadargsprotocomma_ double , double );
  /* declare global and static user variables */

-static void _acc_globals_update() {
- _celsius_ = celsius;
- #pragma acc update device(_celsius_)
- }
-
- #define celsius _celsius_
-
 #if 0 /*BBCORE*/
  /* some parameters have upper and lower limits */
  static HocParmLimits _hoc_parm_limits[] = {
@@ -224,13 +214,9 @@ static void _acc_globals_update() {

 #endif /*BBCORE*/
  static double delta_t = 0.01;
-#pragma acc declare copyin(delta_t)
  static double h0 = 0;
-#pragma acc declare copyin(h0)
  static double m0 = 0;
-#pragma acc declare copyin(m0)
  static double n0 = 0;
-#pragma acc declare copyin(n0)
  /* connect global user variables to hoc */
  static DoubScal hoc_scdoub[] = {
  0,0
@@ -347,6 +333,42 @@ static void nrn_alloc(double* _p, Datum* _ppvar, int _type) {
   hoc_register_dparam_semantics(_mechtype, 5, "k_ion");
  	hoc_register_var(hoc_scdoub, hoc_vdoub, NULL);
  }
+ struct _GlobalVars {
+   int _slist1[3];
+   int _dlist1[3];
+   int _slist2[3];
+   double celsius;
+   int _mechtype;
+   double delta_t;
+   double h0;
+   double m0;
+   double n0;
+ };
+
+ static _GlobalVars _global_variables;
+ static _GlobalVars* _global_variables_ptr;
+
+
+static void _update_global_variables() {
+   _global_variables.celsius = celsius;
+   _global_variables._mechtype = _mechtype;
+   _global_variables.delta_t = delta_t;
+   _global_variables.h0 = h0;
+   _global_variables.m0 = m0;
+   _global_variables.n0 = n0;
+   #pragma acc enter data copyin(_global_variables[0:1]) if(nrn_threads->compute_gpu)
+ }
+
+ #define _slist1 _global_variables_ptr->_slist1
+ #define _dlist1 _global_variables_ptr->_dlist1
+ #define _slist2 _global_variables_ptr->_slist2
+ #define celsius _global_variables_ptr->celsius
+ #define _mechtype _global_variables_ptr->_mechtype
+ #define delta_t _global_variables_ptr->delta_t
+ #define h0 _global_variables_ptr->h0
+ #define m0 _global_variables_ptr->m0
+ #define n0 _global_variables_ptr->n0
+
 static const char *modelname = "hh.mod   squid sodium, potassium, and leak channels";

 static int error;
@@ -367,19 +389,7 @@ static int _ode_spec1(_threadargsproto_);
 #define INSIDE_NMODL
 #endif
  int _newton_states_hhderiv(_threadargsproto_);
-
-#define _slist2 _slist2_hhderiv
-int* _slist2;
-#pragma acc declare create(_slist2)
-
-#define _slist1 _slist1_hhderiv
-int* _slist1;
-#pragma acc declare create(_slist1)
-
-#define _dlist1 _dlist1_hhderiv
-int* _dlist1;
-#pragma acc declare create(_dlist1)
- extern int states(_threadargsproto_);
+  extern int states(_threadargsproto_);

 /*CVODE*/
  static int _ode_spec1 (_threadargsproto_) {int _reset = 0; {
@@ -538,7 +548,7 @@ _thread = _ml->_thread;
     }
     #endif
   }
-_acc_globals_update();
+_update_global_variables();
 double * _nt_data = _nt->_data;
 double * _vec_v = _nt->_actual_v;
 int stream_id = _nt->stream_id;
@@ -701,27 +711,19 @@ for (;;) { /* help clang-format properly indent */
 static void terminal(){}

 static void _initlists(){
+ _global_variables_ptr = &_global_variables;
  double _x; double* _p = &_x;
  int _i; static int _first = 1;
  int _cntml_actual=1;
  int _cntml_padded=1;
  int _iml=0;
   if (!_first) return;
-
- _slist1 = (int*)malloc(sizeof(int)*3);
- _dlist1 = (int*)malloc(sizeof(int)*3);
  _slist1[0] = &(m) - _p;  _dlist1[0] = &(Dm) - _p;
  _slist1[1] = &(h) - _p;  _dlist1[1] = &(Dh) - _p;
  _slist1[2] = &(n) - _p;  _dlist1[2] = &(Dn) - _p;
- #pragma acc enter data copyin(_slist1[0:3])
- #pragma acc enter data copyin(_dlist1[0:3])
-
- _slist2 = (int*)malloc(sizeof(int)*3);
  _slist2[0] = &(h) - _p;
  _slist2[1] = &(m) - _p;
  _slist2[2] = &(n) - _p;
- #pragma acc enter data copyin(_slist2[0:3])
-
 _first = 0;
 }
 } // namespace coreneuron_lib
diff --git a/mod/hhkin.cpp b/mod/hhkin.cpp
index 28c17ab..8ba3023 100644
--- a/mod/hhkin.cpp
+++ b/mod/hhkin.cpp
@@ -172,9 +172,6 @@
  static ThreadDatum* _extcall_thread;
  /* external NEURON variables */
  extern double celsius;
- #define _celsius_ _celsius__hhkin
-double _celsius_;
-#pragma acc declare copyin(_celsius_)

 #if 0 /*BBCORE*/
  /* declaration of user functions */
@@ -199,13 +196,6 @@ double _celsius_;
  inline double vtrap( _threadargsprotocomma_ double , double );
  /* declare global and static user variables */

-static void _acc_globals_update() {
- _celsius_ = celsius;
- #pragma acc update device(_celsius_)
- }
-
- #define celsius _celsius_
-
 #if 0 /*BBCORE*/
  /* some parameters have upper and lower limits */
  static HocParmLimits _hoc_parm_limits[] = {
@@ -227,19 +217,12 @@ static void _acc_globals_update() {

 #endif /*BBCORE*/
  static double delta_t = 0.01;
-#pragma acc declare copyin(delta_t)
  static double hc0 = 0;
-#pragma acc declare copyin(hc0)
  static double h0 = 0;
-#pragma acc declare copyin(h0)
  static double mc0 = 0;
-#pragma acc declare copyin(mc0)
  static double m0 = 0;
-#pragma acc declare copyin(m0)
  static double nc0 = 0;
-#pragma acc declare copyin(nc0)
  static double n0 = 0;
-#pragma acc declare copyin(n0)
  /* connect global user variables to hoc */
  static DoubScal hoc_scdoub[] = {
  0,0
@@ -356,6 +339,49 @@ static void nrn_alloc(double* _p, Datum* _ppvar, int _type) {
   hoc_register_dparam_semantics(_mechtype, 5, "k_ion");
  	hoc_register_var(hoc_scdoub, hoc_vdoub, NULL);
  }
+ struct _GlobalVars {
+   int _slist1[6];
+   int _dlist1[6];
+   double celsius;
+   int _mechtype;
+   double delta_t;
+   double hc0;
+   double h0;
+   double mc0;
+   double m0;
+   double nc0;
+   double n0;
+ };
+
+ static _GlobalVars _global_variables;
+ static _GlobalVars* _global_variables_ptr;
+
+
+static void _update_global_variables() {
+   _global_variables.celsius = celsius;
+   _global_variables._mechtype = _mechtype;
+   _global_variables.delta_t = delta_t;
+   _global_variables.hc0 = hc0;
+   _global_variables.h0 = h0;
+   _global_variables.mc0 = mc0;
+   _global_variables.m0 = m0;
+   _global_variables.nc0 = nc0;
+   _global_variables.n0 = n0;
+   #pragma acc enter data copyin(_global_variables[0:1]) if(nrn_threads->compute_gpu)
+ }
+
+ #define _slist1 _global_variables_ptr->_slist1
+ #define _dlist1 _global_variables_ptr->_dlist1
+ #define celsius _global_variables_ptr->celsius
+ #define _mechtype _global_variables_ptr->_mechtype
+ #define delta_t _global_variables_ptr->delta_t
+ #define hc0 _global_variables_ptr->hc0
+ #define h0 _global_variables_ptr->h0
+ #define mc0 _global_variables_ptr->mc0
+ #define m0 _global_variables_ptr->m0
+ #define nc0 _global_variables_ptr->nc0
+ #define n0 _global_variables_ptr->n0
+
 static const char *modelname = "hh.mod   squid sodium, potassium, and leak channels";

 static int error;
@@ -375,14 +401,6 @@ static inline int rates(_threadargsprotocomma_ double);
 static int _ode_spec1(_threadargsproto_);
 /*static int _ode_matsol1(_threadargsproto_);*/

-#define _slist1 _slist1_hhkin
-int* _slist1;
-#pragma acc declare create(_slist1)
-
-#define _dlist1 _dlist1_hhkin
-int* _dlist1;
-#pragma acc declare create(_dlist1)
-
 /* _kinetic_ states _hhkin */
 #ifndef INSIDE_NMODL
 #define INSIDE_NMODL
@@ -605,7 +623,7 @@ _thread = _ml->_thread;
     }
     #endif
   }
-_acc_globals_update();
+_update_global_variables();
 double * _nt_data = _nt->_data;
 double * _vec_v = _nt->_actual_v;
 int stream_id = _nt->stream_id;
@@ -764,24 +782,19 @@ for (;;) { /* help clang-format properly indent */
 static void terminal(){}

 static void _initlists(){
+ _global_variables_ptr = &_global_variables;
  double _x; double* _p = &_x;
  int _i; static int _first = 1;
  int _cntml_actual=1;
  int _cntml_padded=1;
  int _iml=0;
   if (!_first) return;
-
- _slist1 = (int*)malloc(sizeof(int)*6);
- _dlist1 = (int*)malloc(sizeof(int)*6);
  _slist1[0] = &(hc) - _p;  _dlist1[0] = &(Dhc) - _p;
  _slist1[1] = &(h) - _p;  _dlist1[1] = &(Dh) - _p;
  _slist1[2] = &(mc) - _p;  _dlist1[2] = &(Dmc) - _p;
  _slist1[3] = &(m) - _p;  _dlist1[3] = &(Dm) - _p;
  _slist1[4] = &(nc) - _p;  _dlist1[4] = &(Dnc) - _p;
  _slist1[5] = &(n) - _p;  _dlist1[5] = &(Dn) - _p;
- #pragma acc enter data copyin(_slist1[0:6])
- #pragma acc enter data copyin(_dlist1[0:6])
-
 _first = 0;
 }
 } // namespace coreneuron_lib
diff --git a/mod/hhwatch.cpp b/mod/hhwatch.cpp
index ca4e029..ab4b717 100644
--- a/mod/hhwatch.cpp
+++ b/mod/hhwatch.cpp
@@ -145,10 +145,7 @@ void _net_buf_receive(NrnThread*);
  /* declaration of user functions */

 #endif /*BBCORE*/
-
-#define _mechtype _mechtype_hhwatch
-int _mechtype;
-#pragma acc declare copyin (_mechtype)
+ static int _mechtype;
  static int _pointtype;

 #if 0 /*BBCORE*/
@@ -182,34 +179,12 @@ int _mechtype;

 #endif /*BBCORE*/
  /* declare global and static user variables */
-#define erev erev_hhwatch
- double erev = -65;
- #pragma acc declare copyin (erev)
-#define ek ek_hhwatch
- double ek = -80;
- #pragma acc declare copyin (ek)
-#define ena ena_hhwatch
- double ena = 50;
- #pragma acc declare copyin (ena)
-#define gpas gpas_hhwatch
- double gpas = 0.0001;
- #pragma acc declare copyin (gpas)
-#define gk gk_hhwatch
- double gk = 0.03;
- #pragma acc declare copyin (gk)
-#define gna gna_hhwatch
- double gna = 0.1;
- #pragma acc declare copyin (gna)
-
-static void _acc_globals_update() {
- #pragma acc update device (erev) if(nrn_threads->compute_gpu)
- #pragma acc update device (ek) if(nrn_threads->compute_gpu)
- #pragma acc update device (ena) if(nrn_threads->compute_gpu)
- #pragma acc update device (gpas) if(nrn_threads->compute_gpu)
- #pragma acc update device (gk) if(nrn_threads->compute_gpu)
- #pragma acc update device (gna) if(nrn_threads->compute_gpu)
- }
-
+ static double erev = -65;
+ static double ek = -80;
+ static double ena = 50;
+ static double gpas = 0.0001;
+ static double gk = 0.03;
+ static double gna = 0.1;

 #if 0 /*BBCORE*/
  /* some parameters have upper and lower limits */
@@ -232,12 +207,12 @@ static void _acc_globals_update() {
 #endif /*BBCORE*/
  /* connect global user variables to hoc */
  static DoubScal hoc_scdoub[] = {
- "ena_hhwatch", &ena_hhwatch,
- "ek_hhwatch", &ek_hhwatch,
- "erev_hhwatch", &erev_hhwatch,
- "gna_hhwatch", &gna_hhwatch,
- "gk_hhwatch", &gk_hhwatch,
- "gpas_hhwatch", &gpas_hhwatch,
+ "ena_hhwatch", &ena,
+ "ek_hhwatch", &ek,
+ "erev_hhwatch", &erev,
+ "gna_hhwatch", &gna,
+ "gk_hhwatch", &gk,
+ "gpas_hhwatch", &gpas,
  0,0
 };
  static DoubVec hoc_vdoub[] = {
@@ -337,6 +312,39 @@ static void nrn_alloc(double* _p, Datum* _ppvar, int _type) {
  set_pnt_receive(_mechtype, _net_receive, nullptr, 1);
  	hoc_register_var(hoc_scdoub, hoc_vdoub, NULL);
  }
+ struct _GlobalVars {
+   int _mechtype;
+   double erev;
+   double ek;
+   double ena;
+   double gpas;
+   double gk;
+   double gna;
+ };
+
+ static _GlobalVars _global_variables;
+ static _GlobalVars* _global_variables_ptr;
+
+
+static void _update_global_variables() {
+   _global_variables._mechtype = _mechtype;
+   _global_variables.erev = erev;
+   _global_variables.ek = ek;
+   _global_variables.ena = ena;
+   _global_variables.gpas = gpas;
+   _global_variables.gk = gk;
+   _global_variables.gna = gna;
+   #pragma acc enter data copyin(_global_variables[0:1]) if(nrn_threads->compute_gpu)
+ }
+
+ #define _mechtype _global_variables_ptr->_mechtype
+ #define erev _global_variables_ptr->erev
+ #define ek _global_variables_ptr->ek
+ #define ena _global_variables_ptr->ena
+ #define gpas _global_variables_ptr->gpas
+ #define gk _global_variables_ptr->gk
+ #define gna _global_variables_ptr->gna
+
 static const char *modelname = "";

 static int error;
@@ -635,8 +643,7 @@ double _v, v; int* _ni; int _iml, _cntml_padded, _cntml_actual;
 _cntml_actual = _ml->_nodecount;
 _cntml_padded = _ml->_nodecount_padded;
 _thread = _ml->_thread;
-  #pragma acc update device (_mechtype) if(_nt->compute_gpu)
-_acc_globals_update();
+_update_global_variables();
 double * _nt_data = _nt->_data;
 double * _vec_v = _nt->_actual_v;
 int stream_id = _nt->stream_id;
@@ -787,6 +794,7 @@ void nrn_state(NrnThread* _nt, Memb_list* _ml, int _type) {
 static void terminal(){}

 static void _initlists(){
+ _global_variables_ptr = &_global_variables;
  double _x; double* _p = &_x;
  int _i; static int _first = 1;
  int _cntml_actual=1;
diff --git a/mod/nacum.cpp b/mod/nacum.cpp
index 507b1b9..5ef3bb4 100644
--- a/mod/nacum.cpp
+++ b/mod/nacum.cpp
@@ -136,8 +136,6 @@
 #endif
  static int hoc_nrnpointerindex =  -1;
  static ThreadDatum* _extcall_thread;
- #define FARADAY FARADAY_na
- #define R R_na
  /* external NEURON variables */

 #if 0 /*BBCORE*/
@@ -155,14 +153,7 @@

 #endif /*BBCORE*/
  /* declare global and static user variables */
-#define nabath nabath_na
- double nabath = 116;
- #pragma acc declare copyin (nabath)
-
-static void _acc_globals_update() {
- #pragma acc update device (nabath) if(nrn_threads->compute_gpu)
- }
-
+ static double nabath = 116;

 #if 0 /*BBCORE*/
  /* some parameters have upper and lower limits */
@@ -178,10 +169,9 @@ static void _acc_globals_update() {

 #endif /*BBCORE*/
  static double delta_t = 0.01;
-#pragma acc declare copyin(delta_t)
  /* connect global user variables to hoc */
  static DoubScal hoc_scdoub[] = {
- "nabath_na", &nabath_na,
+ "nabath_na", &nabath,
  0,0
 };
  static DoubVec hoc_vdoub[] = {
@@ -252,11 +242,40 @@ static void nrn_alloc(double* _p, Datum* _ppvar, int _type) {
  	hoc_register_var(hoc_scdoub, hoc_vdoub, NULL);
  }

-double FARADAY = 96520.0;
-#pragma acc declare copyin(FARADAY)
+static double FARADAY = 96520.0;
+
+static double R = 8.3134;
+ struct _GlobalVars {
+   double FARADAY;
+   double R;
+   int _slist1[2];
+   int _dlist1[2];
+   int _mechtype;
+   double nabath;
+   double delta_t;
+ };
+
+ static _GlobalVars _global_variables;
+ static _GlobalVars* _global_variables_ptr;
+
+
+static void _update_global_variables() {
+   _global_variables.FARADAY = FARADAY;
+   _global_variables.R = R;
+   _global_variables._mechtype = _mechtype;
+   _global_variables.nabath = nabath;
+   _global_variables.delta_t = delta_t;
+   #pragma acc enter data copyin(_global_variables[0:1]) if(nrn_threads->compute_gpu)
+ }
+
+ #define FARADAY _global_variables_ptr->FARADAY
+ #define R _global_variables_ptr->R
+ #define _slist1 _global_variables_ptr->_slist1
+ #define _dlist1 _global_variables_ptr->_dlist1
+ #define _mechtype _global_variables_ptr->_mechtype
+ #define nabath _global_variables_ptr->nabath
+ #define delta_t _global_variables_ptr->delta_t

-double R = 8.3134;
-#pragma acc declare copyin(R)
 static const char *modelname = "Sodium ion accumulation";

 static int error;
@@ -266,14 +285,6 @@ static void _modl_cleanup(){ _match_recurse=1;}

 static int _ode_spec1(_threadargsproto_);
 /*static int _ode_matsol1(_threadargsproto_);*/
-
-#define _slist1 _slist1_na
-int* _slist1;
-#pragma acc declare create(_slist1)
-
-#define _dlist1 _dlist1_na
-int* _dlist1;
-#pragma acc declare create(_dlist1)
  static inline int state(_threadargsproto_);

 /*CVODE*/
@@ -315,7 +326,7 @@ double _v, v; int* _ni; int _iml, _cntml_padded, _cntml_actual;
 _cntml_actual = _ml->_nodecount;
 _cntml_padded = _ml->_nodecount_padded;
 _thread = _ml->_thread;
-_acc_globals_update();
+_update_global_variables();
 double * _nt_data = _nt->_data;
 double * _vec_v = _nt->_actual_v;
 int stream_id = _nt->stream_id;
@@ -460,20 +471,15 @@ for (;;) { /* help clang-format properly indent */
 static void terminal(){}

 static void _initlists(){
+ _global_variables_ptr = &_global_variables;
  double _x; double* _p = &_x;
  int _i; static int _first = 1;
  int _cntml_actual=1;
  int _cntml_padded=1;
  int _iml=0;
   if (!_first) return;
-
- _slist1 = (int*)malloc(sizeof(int)*2);
- _dlist1 = (int*)malloc(sizeof(int)*2);
  _slist1[0] = &(nai) - _p;  _dlist1[0] = &(Dnai) - _p;
  _slist1[1] = &(nao) - _p;  _dlist1[1] = &(Dnao) - _p;
- #pragma acc enter data copyin(_slist1[0:2])
- #pragma acc enter data copyin(_dlist1[0:2])
-
 _first = 0;
 }
 } // namespace coreneuron_lib
diff --git a/mod/vecevent.cpp b/mod/vecevent.cpp
index 98cc8a4..e2e3383 100644
--- a/mod/vecevent.cpp
+++ b/mod/vecevent.cpp
@@ -181,10 +181,6 @@
 #endif /*BBCORE*/
  /* declare global and static user variables */

-static void _acc_globals_update() {
- }
-
-
 #if 0 /*BBCORE*/
  /* some parameters have upper and lower limits */
  static HocParmLimits _hoc_parm_limits[] = {
@@ -272,6 +268,21 @@ static void nrn_alloc(double* _p, Datum* _ppvar, int _type) {
  set_pnt_receive(_mechtype, _net_receive, nullptr, 1);
  	hoc_register_var(hoc_scdoub, hoc_vdoub, NULL);
  }
+ struct _GlobalVars {
+   int _mechtype;
+ };
+
+ static _GlobalVars _global_variables;
+ static _GlobalVars* _global_variables_ptr;
+
+
+static void _update_global_variables() {
+   _global_variables._mechtype = _mechtype;
+   #pragma acc enter data copyin(_global_variables[0:1]) if(nrn_threads->compute_gpu)
+ }
+
+ #define _mechtype _global_variables_ptr->_mechtype
+
 static const char *modelname = "";

 static int error;
@@ -492,7 +503,7 @@ double _v, v; int* _ni; int _iml, _cntml_padded, _cntml_actual;
 _cntml_actual = _ml->_nodecount;
 _cntml_padded = _ml->_nodecount_padded;
 _thread = _ml->_thread;
-_acc_globals_update();
+_update_global_variables();
 double * _nt_data = _nt->_data;
 double * _vec_v = _nt->_actual_v;
 int stream_id = _nt->stream_id;
@@ -565,6 +576,7 @@ for (;;) { /* help clang-format properly indent */
 static void terminal(){}

 static void _initlists(){
+ _global_variables_ptr = &_global_variables;
  double _x; double* _p = &_x;
  int _i; static int _first = 1;
  int _cntml_actual=1;

pramodk added a commit to BlueBrain/CoreNeuron that referenced this pull request Apr 3, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
Copy link
Contributor

@nrnhines nrnhines left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a sound way to encapsulate. Later can deal with EXTERNAL translation which is very rare (maybe nmodl will have superceded mod2c so won't have to do it for mod2c). This encapsulation may be a milestone on the (future) road toward multiple instances of a model.
Anyway, if this allows dynamic loading to work for gpu, then well worth it!

@nrnhines
Copy link
Contributor

nrnhines commented Apr 3, 2022

passing on CPU which means there are "obvious" issues

Did you mean no obvious issues... Or
Does this mean there is no companion NEURON PR that uses this mod2c PR to run dynamic loading GPU tests with the NEURON CI?

@pramodk
Copy link
Collaborator Author

pramodk commented Apr 4, 2022

Did you mean no obvious issues...

My bad! Yes, I meant there are NO obvious issues.

pramodk added a commit to BlueBrain/CoreNeuron that referenced this pull request Apr 21, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
pramodk and others added 9 commits April 25, 2022 16:38
* all global variables are now wrapped into
  struct called _GlobalVars.
* previous global variables are still printed
  but their values is now copied into _GlobalVars
  during nrn_init.
* variables like slist and dlist are now removed
* we now only copy _GlobalVars into GPU; all other
  acc copyin / update for global variables is now
  removed.
* we now print all global variables as static and
  remove all their macro renaming.

Todos:

- [x] basic implementation / draft removing all global usage
- [x] run neuron test suite on CPU
- [ ] run neuron test suite on GPU
- [ ] fix TODOs
- [ ] code cleanup including memory cleanup

Related to BlueBrain/CoreNeuron/issues/141
 * use ml->instance to store global variables
 * pass ml as an extra parameter everywhere
 * remove slist/dlist from present clauses
 * initlist is now called from nrn_init
 * TODOs: quite some todos that needs to be
   resolved.
@olupton olupton force-pushed the pramodk/exclude-global-vars branch from 6b59295 to 52deed9 Compare April 26, 2022 07:29
olupton pushed a commit to BlueBrain/CoreNeuron that referenced this pull request Apr 26, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
Copy link
Collaborator Author

@pramodk pramodk left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@olupton : your changes look fine to me.

I have added some comments for myself as a reminder so don't worry about those immediately.

olupton pushed a commit to BlueBrain/CoreNeuron that referenced this pull request Apr 28, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
olupton pushed a commit to BlueBrain/CoreNeuron that referenced this pull request Jul 12, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
olupton pushed a commit to BlueBrain/CoreNeuron that referenced this pull request Jul 12, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
olupton pushed a commit to BlueBrain/CoreNeuron that referenced this pull request Aug 11, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
olupton pushed a commit to BlueBrain/CoreNeuron that referenced this pull request Aug 16, 2022
* mod2c now generates code without need of global variables
* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via pyton
* scopmath library can be also shared
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU

- [x] MOD2C generates code without using globals / acc declare
      See see BlueBrain/mod2c/pull/78
- [x] Basic test with special and python on GPU
      See #141 (comment)
- [ ] Link issues with CUDA part e.g. nrnran123.cu functions result
      into link errors, see
      #141 (comment)
      @olupton to rescue!
- [ ] Check celsius usage within coreneuron source code
- [ ] Investigate why acc_deviceptr(ml->data) returns host
      pointer when coreneuron is launched via python. See
      #141 (comment)
- [ ] Run neuron test suite and external models like olfactory-buld via
      python
- [ ] Update submodule BlueBrain/mod2c/pull/78
Copy link
Collaborator Author

@pramodk pramodk left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Skimmed through changes and fixed obvious TODOs.

@olupton : could you see if anything else I should handle/improve? (of course, feel free to push the changes yourself)

Copy link
Collaborator Author

@pramodk pramodk left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

(as the author, I can't approve this one)

@pramodk pramodk merged commit 469c74d into master Aug 27, 2022
pramodk added a commit to BlueBrain/CoreNeuron that referenced this pull request Aug 28, 2022
…#795)

* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via python
* update MOD2C and NMODL fixes to handle GLOBAL variables
      See BlueBrain/mod2c/pull/78
      See BlueBrain/nmodl/pull/904
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU
* Pass Memb_list* as an argument for all common prototypes in order
   to support global variables via argument
* free ml->instance if not empty
* add link to libscopmath in neuron as well
* nrn_ghk is now declared inline.
* homegrown present table to avoid dynamic loading + acc_deviceptr limitations
* use -gpu=nordc and make #pragma acc routine seq functions inline
* drop -lscopmath as its folded in elsewhere
* random123 header reorganisation
* try and cleanup CLI11 handling.
* try and consolidate build logic
* some CORENEURON_ -> CORENRN_ for consistency.
* export OpenACC flags to NEURON separately as well as part
     of the whole ... -lcoreneuron ... link line.
* libcoreneuron.so -> libcorenrnmech.so, try and fix static builds
* do not enable OpenMP in shared/OpenACC builds.
* add rpaths inside nrnivmodl-core.
* accept a private destructor function pointer from generated mechanisms
* drop ${TEST_EXEC_PREFIX} that was causing simple tests to be executed on many ranks.
* CORENEURON_GPU_DEBUG: add environment variable that enables cnrn_target_* debug messages.

fixes #141

Co-authored-by: Olli Lupton <[email protected]>
pramodk added a commit to neuronsimulator/nrn that referenced this pull request Nov 2, 2022
…BlueBrain/CoreNeuron#795)

* coreneuron and mechanism library can be built as shared and it
  enables launching coreneuron on GPU via python
* update MOD2C and NMODL fixes to handle GLOBAL variables
      See BlueBrain/mod2c/pull/78
      See BlueBrain/nmodl/pull/904
* removed acc/openmp global annotations for celsius, pi and secondorder
  and they don't need to be copied on GPU
* Pass Memb_list* as an argument for all common prototypes in order
   to support global variables via argument
* free ml->instance if not empty
* add link to libscopmath in neuron as well
* nrn_ghk is now declared inline.
* homegrown present table to avoid dynamic loading + acc_deviceptr limitations
* use -gpu=nordc and make #pragma acc routine seq functions inline
* drop -lscopmath as its folded in elsewhere
* random123 header reorganisation
* try and cleanup CLI11 handling.
* try and consolidate build logic
* some CORENEURON_ -> CORENRN_ for consistency.
* export OpenACC flags to NEURON separately as well as part
     of the whole ... -lcoreneuron ... link line.
* libcoreneuron.so -> libcorenrnmech.so, try and fix static builds
* do not enable OpenMP in shared/OpenACC builds.
* add rpaths inside nrnivmodl-core.
* accept a private destructor function pointer from generated mechanisms
* drop ${TEST_EXEC_PREFIX} that was causing simple tests to be executed on many ranks.
* CORENEURON_GPU_DEBUG: add environment variable that enables cnrn_target_* debug messages.

fixes BlueBrain/CoreNeuron#141

Co-authored-by: Olli Lupton <[email protected]>

CoreNEURON Repo SHA: BlueBrain/CoreNeuron@12272f8
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants