2
2
using System . Threading . Tasks ;
3
3
using System . IO ;
4
4
using AdvUtils ;
5
+ using System . Numerics ;
5
6
6
7
/// <summary>
7
8
/// RNNSharp written by Zhongkai Fu ([email protected] )
@@ -31,9 +32,13 @@ public class SimpleRNN : RNN
31
32
protected Matrix < double > Feature2HiddenWeights { get ; set ; }
32
33
33
34
//The learning ratio of each weight
34
- protected Matrix < float > HiddenBpttWeightsLearningRate { get ; set ; }
35
- protected Matrix < float > Input2HiddenWeightsLearningRate { get ; set ; }
36
- protected Matrix < float > Feature2HiddenWeightsLearningRate { get ; set ; }
35
+ protected Matrix < double > HiddenBpttWeightsLearningRate { get ; set ; }
36
+ protected Matrix < double > Input2HiddenWeightsLearningRate { get ; set ; }
37
+ protected Matrix < double > Feature2HiddenWeightsLearningRate { get ; set ; }
38
+
39
+ protected Vector < double > vecMaxGrad ;
40
+ protected Vector < double > vecMinGrad ;
41
+ protected Vector < double > vecNormalLearningRate ;
37
42
38
43
public SimpleRNN ( )
39
44
{
@@ -257,28 +262,55 @@ private void learnBptt(State state)
257
262
//dense weight update fea->0
258
263
double [ ] vector_a = null ;
259
264
double er = neuHidden . er [ a ] ;
265
+ Vector < double > vecErr = new Vector < double > ( er ) ;
266
+
267
+ int i = 0 ;
260
268
if ( DenseFeatureSize > 0 )
261
269
{
262
270
vector_a = mat_bptt_synf [ a ] ;
263
- for ( int i = 0 ; i < DenseFeatureSize ; i ++ )
271
+ i = 0 ;
272
+ while ( i < DenseFeatureSize - Vector < double > . Count )
273
+ {
274
+ Vector < double > v1 = new Vector < double > ( bptt_fea_step , i ) ;
275
+ Vector < double > v2 = new Vector < double > ( vector_a , i ) ;
276
+ v2 += vecErr * v1 ;
277
+ v2 . CopyTo ( vector_a , i ) ;
278
+
279
+ i += Vector < double > . Count ;
280
+ }
281
+
282
+ while ( i < DenseFeatureSize )
264
283
{
265
284
vector_a [ i ] += er * bptt_fea_step [ i ] ;
285
+ i ++ ;
266
286
}
267
287
}
268
288
269
289
//sparse weight update hidden->input
270
290
vector_a = mat_bptt_syn0_w [ a ] ;
271
- for ( int i = 0 ; i < sparse . Count ; i ++ )
291
+ for ( i = 0 ; i < sparse . Count ; i ++ )
272
292
{
273
293
var entry = sparse . GetEntry ( i ) ;
274
294
vector_a [ entry . Key ] += er * entry . Value ;
275
295
}
276
296
277
297
//bptt weight update
278
298
vector_a = mat_bptt_syn0_ph [ a ] ;
279
- for ( int i = 0 ; i < L1 ; i ++ )
299
+ i = 0 ;
300
+ while ( i < L1 - Vector < double > . Count )
301
+ {
302
+ Vector < double > v1 = new Vector < double > ( neuLastHidden . cellOutput , i ) ;
303
+ Vector < double > v2 = new Vector < double > ( vector_a , i ) ;
304
+ v2 += vecErr * v1 ;
305
+ v2 . CopyTo ( vector_a , i ) ;
306
+
307
+ i += Vector < double > . Count ;
308
+ }
309
+
310
+ while ( i < L1 )
280
311
{
281
312
vector_a [ i ] += er * neuLastHidden . cellOutput [ i ] ;
313
+ i ++ ;
282
314
}
283
315
284
316
} ) ;
@@ -308,33 +340,85 @@ private void learnBptt(State state)
308
340
{
309
341
double [ ] vector_b = null ;
310
342
double [ ] vector_bf = null ;
343
+ double [ ] vector_lr = null ;
311
344
312
345
//Update bptt feature weights
313
346
vector_b = HiddenBpttWeights [ b ] ;
314
347
vector_bf = mat_bptt_syn0_ph [ b ] ;
315
- for ( int i = 0 ; i < L1 ; i ++ )
348
+ vector_lr = HiddenBpttWeightsLearningRate [ b ] ;
349
+
350
+ int i = 0 ;
351
+ while ( i < L1 - Vector < double > . Count )
352
+ {
353
+ Vector < double > vecDelta = new Vector < double > ( vector_bf , i ) ;
354
+ Vector < double > vecLearningRate = new Vector < double > ( vector_lr , i ) ;
355
+ Vector < double > vecB = new Vector < double > ( vector_b , i ) ;
356
+ vecDelta = Vector . Min < double > ( vecDelta , vecMaxGrad ) ;
357
+ vecDelta = Vector . Max < double > ( vecDelta , vecMinGrad ) ;
358
+
359
+ vecLearningRate += ( vecDelta * vecDelta ) ;
360
+ vecLearningRate . CopyTo ( vector_lr , i ) ;
361
+ vecLearningRate = vecNormalLearningRate / ( Vector < double > . One + Vector . SquareRoot < double > ( vecLearningRate ) ) ;
362
+
363
+ vecB += ( vecLearningRate * vecDelta ) ;
364
+ vecB . CopyTo ( vector_b , i ) ;
365
+
366
+ Vector < double > . Zero . CopyTo ( vector_bf , i ) ;
367
+
368
+ i += Vector < double > . Count ;
369
+ }
370
+
371
+ while ( i < L1 )
316
372
{
317
373
double delta = NormalizeGradient ( vector_bf [ i ] ) ;
318
374
double newLearningRate = UpdateLearningRate ( HiddenBpttWeightsLearningRate , b , i , delta ) ;
319
375
320
376
vector_b [ i ] += newLearningRate * delta ;
321
377
//Clean bptt weight error
322
378
vector_bf [ i ] = 0 ;
379
+
380
+ i ++ ;
323
381
}
324
382
325
383
//Update dense feature weights
326
384
if ( DenseFeatureSize > 0 )
327
385
{
328
386
vector_b = Feature2HiddenWeights [ b ] ;
329
387
vector_bf = mat_bptt_synf [ b ] ;
330
- for ( int i = 0 ; i < DenseFeatureSize ; i ++ )
388
+ vector_lr = Feature2HiddenWeightsLearningRate [ b ] ;
389
+
390
+ i = 0 ;
391
+ while ( i < DenseFeatureSize - Vector < double > . Count )
392
+ {
393
+ Vector < double > vecDelta = new Vector < double > ( vector_bf , i ) ;
394
+ Vector < double > vecLearningRate = new Vector < double > ( vector_lr , i ) ;
395
+ Vector < double > vecB = new Vector < double > ( vector_b , i ) ;
396
+ vecDelta = Vector . Min < double > ( vecDelta , vecMaxGrad ) ;
397
+ vecDelta = Vector . Max < double > ( vecDelta , vecMinGrad ) ;
398
+
399
+ vecLearningRate += ( vecDelta * vecDelta ) ;
400
+ vecLearningRate . CopyTo ( vector_lr , i ) ;
401
+ vecLearningRate = vecNormalLearningRate / ( Vector < double > . One + Vector . SquareRoot < double > ( vecLearningRate ) ) ;
402
+
403
+ vecB += ( vecLearningRate * vecDelta ) ;
404
+ vecB . CopyTo ( vector_b , i ) ;
405
+
406
+ vecDelta = Vector < double > . Zero ;
407
+ vecDelta . CopyTo ( vector_bf , i ) ;
408
+
409
+ i += Vector < double > . Count ;
410
+ }
411
+
412
+ while ( i < DenseFeatureSize )
331
413
{
332
414
double delta = NormalizeGradient ( vector_bf [ i ] ) ;
333
415
double newLearningRate = UpdateLearningRate ( Feature2HiddenWeightsLearningRate , b , i , delta ) ;
334
416
335
417
vector_b [ i ] += newLearningRate * delta ;
336
418
//Clean dense feature weights error
337
419
vector_bf [ i ] = 0 ;
420
+
421
+ i ++ ;
338
422
}
339
423
}
340
424
@@ -347,7 +431,7 @@ private void learnBptt(State state)
347
431
if ( sparse == null )
348
432
break ;
349
433
350
- for ( int i = 0 ; i < sparse . Count ; i ++ )
434
+ for ( i = 0 ; i < sparse . Count ; i ++ )
351
435
{
352
436
int pos = sparse . GetEntry ( i ) . Key ;
353
437
@@ -387,10 +471,14 @@ public void resetBpttMem()
387
471
388
472
public override void CleanStatus ( )
389
473
{
390
- Hidden2OutputWeightLearningRate = new Matrix < float > ( L2 , L1 ) ;
391
- Input2HiddenWeightsLearningRate = new Matrix < float > ( L1 , L0 ) ;
392
- Feature2HiddenWeightsLearningRate = new Matrix < float > ( L1 , DenseFeatureSize ) ;
393
- HiddenBpttWeightsLearningRate = new Matrix < float > ( L1 , L1 ) ;
474
+ Hidden2OutputWeightLearningRate = new Matrix < double > ( L2 , L1 ) ;
475
+ Input2HiddenWeightsLearningRate = new Matrix < double > ( L1 , L0 ) ;
476
+ Feature2HiddenWeightsLearningRate = new Matrix < double > ( L1 , DenseFeatureSize ) ;
477
+ HiddenBpttWeightsLearningRate = new Matrix < double > ( L1 , L1 ) ;
478
+
479
+ vecMaxGrad = new Vector < double > ( GradientCutoff ) ;
480
+ vecMinGrad = new Vector < double > ( - GradientCutoff ) ;
481
+ vecNormalLearningRate = new Vector < double > ( LearningRate ) ;
394
482
}
395
483
public override void InitMem ( )
396
484
{
0 commit comments