-
Notifications
You must be signed in to change notification settings - Fork 4.3k
/
Copy pathHeuristicPolicy.cs
142 lines (123 loc) · 4.15 KB
/
HeuristicPolicy.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
using System.Collections.Generic;
using System;
using System.Collections;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies
{
/// <summary>
/// The Heuristic Policy uses a hard-coded Heuristic method
/// to take decisions each time the RequestDecision method is
/// called.
/// </summary>
internal class HeuristicPolicy : IPolicy
{
ActuatorManager m_ActuatorManager;
ActionBuffers m_ActionBuffers;
bool m_Done;
bool m_DecisionRequested;
ObservationWriter m_ObservationWriter = new ObservationWriter();
NullList m_NullList = new NullList();
public HeuristicPolicy(ActuatorManager actuatorManager, ActionSpec actionSpec)
{
m_ActuatorManager = actuatorManager;
var numContinuousActions = actionSpec.NumContinuousActions;
var numDiscreteActions = actionSpec.NumDiscreteActions;
var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);
}
/// <inheritdoc />
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
StepSensors(sensors);
m_Done = info.done;
m_DecisionRequested = true;
}
/// <inheritdoc />
public ref readonly ActionBuffers DecideAction()
{
if (!m_Done && m_DecisionRequested)
{
m_ActionBuffers.Clear();
m_ActuatorManager.ApplyHeuristic(m_ActionBuffers);
}
m_DecisionRequested = false;
return ref m_ActionBuffers;
}
public void Dispose()
{
}
/// <summary>
/// Trivial implementation of the IList interface that does nothing.
/// This is only used for "writing" observations that we will discard.
/// </summary>
internal class NullList : IList<float>
{
public IEnumerator<float> GetEnumerator()
{
throw new NotImplementedException();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Add(float item)
{
}
public void Clear()
{
}
public bool Contains(float item)
{
return false;
}
public void CopyTo(float[] array, int arrayIndex)
{
throw new NotImplementedException();
}
public bool Remove(float item)
{
return false;
}
public int Count { get; }
public bool IsReadOnly { get; }
public int IndexOf(float item)
{
return -1;
}
public void Insert(int index, float item)
{
}
public void RemoveAt(int index)
{
}
public float this[int index]
{
get { return 0.0f; }
set { }
}
}
/// <summary>
/// Run ISensor.Write or ISensor.GetCompressedObservation for each sensor
/// The output is currently unused, but this makes the sensor usage consistent
/// between training and inference.
/// </summary>
/// <param name="sensors"></param>
void StepSensors(List<ISensor> sensors)
{
foreach (var sensor in sensors)
{
if (sensor.GetCompressionSpec().SensorCompressionType == SensorCompressionType.None)
{
m_ObservationWriter.SetTarget(m_NullList, sensor.GetObservationSpec(), 0);
sensor.Write(m_ObservationWriter);
}
else
{
sensor.GetCompressedObservation();
}
}
}
}
}