1
+ /* *
2
+ * @file
3
+ *
4
+ * @brief Class encapsulating the executable being dissassembled.
5
+ *
6
+ * @copyright Assemblize is free software: you can redistribute it and/or
7
+ * modify it under the terms of the GNU General Public License
8
+ * as published by the Free Software Foundation, either version
9
+ * 3 of the License, or (at your option) any later version.
10
+ * A full copy of the GNU General Public License can be found in
11
+ * LICENSE
12
+ */
1
13
#include " executable.h"
14
+ #include " function.h"
2
15
#include < LIEF/LIEF.hpp>
3
16
#include < fstream>
4
17
#include < iostream>
5
18
#include < nlohmann/json.hpp>
19
+ #include < strings.h>
6
20
7
21
const char unassemblize::Executable::s_symbolSection[] = " symbols" ;
8
-
9
- unassemblize::Executable::Executable (const char *file_name, bool verbose) :
10
- m_binary(LIEF::Parser::parse(file_name)), m_endAddress(0 ), m_verbose(verbose)
22
+ const char unassemblize::Executable::s_sectionsSection[] = " sections" ;
23
+ const char unassemblize::Executable::s_configSection[] = " config" ;
24
+ const char unassemblize::Executable::s_objectSection[] = " objects" ;
25
+
26
+ unassemblize::Executable::Executable (const char *file_name, OutputFormats format, bool verbose) :
27
+ m_binary(LIEF::Parser::parse(file_name)),
28
+ m_endAddress(0 ),
29
+ m_outputFormat(format),
30
+ m_codeAlignment(sizeof (uint32_t )),
31
+ m_dataAlignment(sizeof (uint32_t )),
32
+ m_codePad(0x90 ), // NOP
33
+ m_dataPad(0x00 ),
34
+ m_verbose(verbose)
11
35
{
12
36
if (m_verbose) {
13
37
printf (" Loading section info...\n " );
@@ -30,6 +54,14 @@ unassemblize::Executable::Executable(const char *file_name, bool verbose) :
30
54
if (section.address + section.size > m_endAddress) {
31
55
m_endAddress = section.address + section.size ;
32
56
}
57
+
58
+ // Naive split on whether section contains data or code... have entrypoint? Code, else data.
59
+ // Needs to be refined by providing a config file with section types specified.
60
+ if (section.address <= m_binary->entrypoint () && section.address + section.size >= m_binary->entrypoint ()) {
61
+ section.type = SECTION_CODE;
62
+ } else {
63
+ section.type = SECTION_DATA;
64
+ }
33
65
}
34
66
}
35
67
@@ -44,6 +76,15 @@ unassemblize::Executable::Executable(const char *file_name, bool verbose) :
44
76
m_symbolMap.insert ({it->value (), Symbol (it->name (), it->value (), it->size ())});
45
77
}
46
78
}
79
+
80
+ auto exe_imports = m_binary->imported_functions ();
81
+
82
+ for (auto it = exe_imports.begin (); it != exe_imports.end (); ++it) {
83
+ if (it->value () != 0 && !it->name ().empty () && m_symbolMap.find (it->value ()) == m_symbolMap.end ()) {
84
+ m_loadedSymbols.push_back (it->name ());
85
+ m_symbolMap.insert ({it->value (), Symbol (m_loadedSymbols.back (), it->value (), it->size ())});
86
+ }
87
+ }
47
88
}
48
89
49
90
const uint8_t *unassemblize::Executable::section_data (const char *name) const
@@ -70,6 +111,19 @@ uint64_t unassemblize::Executable::base_address() const
70
111
}
71
112
72
113
const unassemblize::Executable::Symbol &unassemblize::Executable::get_symbol (uint64_t addr) const
114
+ {
115
+ static std::string empty;
116
+ static Symbol def (empty, 0 , 0 );
117
+ auto it = m_symbolMap.find (addr);
118
+
119
+ if (it != m_symbolMap.end ()) {
120
+ return it->second ;
121
+ }
122
+
123
+ return def;
124
+ }
125
+
126
+ const unassemblize::Executable::Symbol &unassemblize::Executable::get_nearest_symbol (uint64_t addr) const
73
127
{
74
128
static std::string empty;
75
129
static Symbol def (empty, 0 , 0 );
@@ -86,18 +140,103 @@ const unassemblize::Executable::Symbol &unassemblize::Executable::get_symbol(uin
86
140
return def;
87
141
}
88
142
89
- void unassemblize::Executable::load_symbols (const char *file_name)
143
+ void unassemblize::Executable::add_symbol (const char *sym, uint64_t addr)
144
+ {
145
+ if (m_symbolMap.find (addr) == m_symbolMap.end ()) {
146
+ m_loadedSymbols.push_back (sym);
147
+ m_symbolMap.insert ({addr, Symbol (m_loadedSymbols.back (), addr, 0 )});
148
+ }
149
+ }
150
+
151
+ void unassemblize::Executable::load_config (const char *file_name)
90
152
{
153
+ if (m_verbose) {
154
+ printf (" Loading config file '%s'...\n " , file_name);
155
+ }
156
+
91
157
std::ifstream fs (file_name);
92
158
93
159
if (!fs.good ()) {
94
160
return ;
95
161
}
96
162
97
163
nlohmann::json j = nlohmann::json::parse (fs);
98
- auto &jsyms = j.at (s_symbolSection);
99
164
100
- for (auto it = jsyms.begin (); it != jsyms.end (); ++it) {
165
+ if (j.find (s_configSection) != j.end ()) {
166
+ nlohmann::json &conf = j.at (s_configSection);
167
+ conf.at (" codealign" ).get_to (m_codeAlignment);
168
+ conf.at (" dataalign" ).get_to (m_dataAlignment);
169
+ conf.at (" codepadding" ).get_to (m_codePad);
170
+ conf.at (" datapadding" ).get_to (m_dataPad);
171
+ }
172
+
173
+ if (j.find (s_symbolSection) != j.end ()) {
174
+ load_symbols (j.at (s_symbolSection));
175
+ }
176
+
177
+ if (j.find (s_sectionsSection) != j.end ()) {
178
+ load_sections (j.at (s_sectionsSection));
179
+ }
180
+
181
+ if (j.find (s_objectSection) != j.end ()) {
182
+ load_objects (j.at (s_objectSection));
183
+ }
184
+ }
185
+
186
+ void unassemblize::Executable::save_config (const char *file_name)
187
+ {
188
+ if (m_verbose) {
189
+ printf (" Saving config file '%s'...\n " , file_name);
190
+ }
191
+
192
+ nlohmann::json j;
193
+
194
+ // Parse the config file if it already exists and update it.
195
+ {
196
+ std::ifstream fs (file_name);
197
+
198
+ if (fs.good ()) {
199
+ j = nlohmann::json::parse (fs);
200
+ }
201
+ }
202
+
203
+ if (j.find (s_configSection) == j.end ()) {
204
+ j[s_configSection] = nlohmann::json ();
205
+ }
206
+
207
+ nlohmann::json &conf = j.at (s_configSection);
208
+ conf[" codealign" ] = m_codeAlignment;
209
+ conf[" dataalign" ] = m_dataAlignment;
210
+ conf[" codepadding" ] = m_codePad;
211
+ conf[" datapadding" ] = m_dataPad;
212
+
213
+ // Don't dump if we already have a sections for these.
214
+ if (j.find (s_symbolSection) == j.end ()) {
215
+ j[s_symbolSection] = nlohmann::json ();
216
+ dump_symbols (j.at (s_symbolSection));
217
+ }
218
+
219
+ if (j.find (s_sectionsSection) == j.end ()) {
220
+ j[s_sectionsSection] = nlohmann::json ();
221
+ dump_sections (j.at (s_sectionsSection));
222
+ }
223
+
224
+ if (j.find (s_objectSection) == j.end ()) {
225
+ j[s_objectSection] = nlohmann::json ();
226
+ dump_objects (j.at (s_objectSection));
227
+ }
228
+
229
+ std::ofstream fs (file_name);
230
+ fs << std::setw (4 ) << j << std::endl;
231
+ }
232
+
233
+ void unassemblize::Executable::load_symbols (nlohmann::json &js)
234
+ {
235
+ if (m_verbose) {
236
+ printf (" Loading external symbols...\n " );
237
+ }
238
+
239
+ for (auto it = js.begin (); it != js.end (); ++it) {
101
240
std::string name;
102
241
it->at (" name" ).get_to (name);
103
242
@@ -126,35 +265,159 @@ void unassemblize::Executable::load_symbols(const char *file_name)
126
265
}
127
266
}
128
267
129
- void unassemblize::Executable::dump_symbols (const char *file_name )
268
+ void unassemblize::Executable::dump_symbols (nlohmann::json &js )
130
269
{
131
- auto exe_syms = m_binary->symbols ();
132
- nlohmann::json j;
270
+ if (m_verbose) {
271
+ printf (" Saving symbols...\n " );
272
+ }
133
273
134
- {
135
- std::ifstream fs (file_name);
274
+ for (auto it = m_symbolMap.begin (); it != m_symbolMap.end (); ++it) {
275
+ js.push_back ({{" name" , it->second .name }, {" address" , it->second .value }, {" size" , it->second .size }});
276
+ }
277
+ }
136
278
137
- if (fs.good ()) {
138
- j = nlohmann::json::parse (fs);
279
+ void unassemblize::Executable::load_sections (nlohmann::json &js)
280
+ {
281
+ if (m_verbose) {
282
+ printf (" Loading section info...\n " );
283
+ }
284
+
285
+ for (auto it = js.begin (); it != js.end (); ++it) {
286
+ std::string name;
287
+ it->at (" name" ).get_to (name);
288
+
289
+ // Don't try and load an empty symbol.
290
+ if (!name.empty ()) {
291
+ auto section = m_sections.find (name);
292
+
293
+ if (section == m_sections.end () && m_verbose) {
294
+ printf (" Tried to load section info for section not present in this binary!\n " );
295
+ printf (" Section '%s' info was ignored.\n " , name.c_str ());
296
+ }
297
+
298
+ std::string type;
299
+ it->at (" type" ).get_to (type);
300
+
301
+ if (strcasecmp (type.c_str (), " code" ) == 0 ) {
302
+ section->second .type = SECTION_CODE;
303
+ } else if (strcasecmp (type.c_str (), " data" ) == 0 ) {
304
+ section->second .type = SECTION_DATA;
305
+ } else if (m_verbose) {
306
+ printf (" Incorrect type specified for section '%s'.\n " , name.c_str ());
307
+ }
139
308
}
140
309
}
310
+ }
141
311
312
+ void unassemblize::Executable::dump_sections (nlohmann::json &js)
313
+ {
142
314
if (m_verbose) {
143
- printf (" Dumping symbols to file '%s' ...\n " , file_name );
315
+ printf (" Saving section info ...\n " );
144
316
}
145
317
146
- // Remove symbols if it already exists and repopulate it.
147
- if (j.find (s_symbolSection) != j.end ()) {
148
- j.erase (s_symbolSection);
318
+ for (auto it = m_sections.begin (); it != m_sections.end (); ++it) {
319
+ js.push_back ({{" name" , it->first }, {" type" , it->second .type == SECTION_CODE ? " code" : " data" }});
149
320
}
321
+ }
150
322
151
- j[s_symbolSection] = nlohmann::json ();
152
- auto &syms = j.at (s_symbolSection);
323
+ void unassemblize::Executable::load_objects (nlohmann::json &js)
324
+ {
325
+ if (m_verbose) {
326
+ printf (" Loading objects...\n " );
327
+ }
153
328
154
- for (auto it = exe_syms.begin (); it != exe_syms.end (); ++it) {
155
- syms.push_back ({{" name" , it->name ()}, {" address" , it->value ()}, {" size" , it->size ()}});
329
+ for (auto it = js.begin (); it != js.end (); ++it) {
330
+ std::string obj_name;
331
+ it->at (" name" ).get_to (obj_name);
332
+
333
+ if (obj_name.empty ()) {
334
+ continue ;
335
+ }
336
+
337
+ m_targetObjects.push_back ({obj_name, std::list<ObjectSection>()});
338
+ auto &obj = m_targetObjects.back ();
339
+ auto §ions = js.back ().at (" sections" );
340
+
341
+ for (auto sec = sections.begin (); sec != sections.end (); ++sec) {
342
+ std::string name;
343
+ uint64_t start;
344
+ uint64_t size;
345
+ sec->at (" name" ).get_to (name);
346
+ sec->at (" start" ).get_to (start);
347
+ sec->at (" size" ).get_to (size);
348
+ obj.sections .push_back ({name, start, size});
349
+ }
156
350
}
351
+ }
157
352
158
- std::ofstream fs (file_name);
159
- fs << std::setw (4 ) << j << std::endl;
353
+ void unassemblize::Executable::dump_objects (nlohmann::json &js)
354
+ {
355
+ if (m_verbose) {
356
+ printf (" Saving objects...\n " );
357
+ }
358
+
359
+ if (m_targetObjects.empty ()) {
360
+ m_targetObjects.push_back (
361
+ {m_binary->name ().substr (m_binary->name ().find_last_of (" /\\ " ) + 1 ), std::list<ObjectSection>()});
362
+ auto &obj = m_targetObjects.back ();
363
+
364
+ for (auto it = m_binary->sections ().begin (); it != m_binary->sections ().end (); ++it) {
365
+ if (it->name ().empty () || it->size () == 0 ) {
366
+ continue ;
367
+ }
368
+
369
+ obj.sections .push_back ({it->name (), 0 , it->size ()});
370
+ }
371
+ }
372
+
373
+ for (auto it = m_targetObjects.begin (); it != m_targetObjects.end (); ++it) {
374
+ js.push_back ({{" name" , it->name }, {" sections" , nlohmann::json ()}});
375
+ auto §ions = js.back ().at (" sections" );
376
+
377
+ for (auto it2 = it->sections .begin (); it2 != it->sections .end (); ++it2) {
378
+ sections.push_back ({{" name" , it2->name }, {" start" , it2->start }, {" size" , it2->size }});
379
+ }
380
+ }
381
+ }
382
+
383
+ void unassemblize::Executable::dissassemble_function (
384
+ FILE *output, const char *section_name, uint64_t start, uint64_t end)
385
+ {
386
+ // Abort if we can't output anywhere.
387
+ if (output == nullptr ) {
388
+ return ;
389
+ }
390
+
391
+ if (m_outputFormat != OUTPUT_MASM) {
392
+ dissassemble_gas_func (output, section_name, start, end);
393
+ }
394
+ }
395
+
396
+ void unassemblize::Executable::dissassemble_gas_func (
397
+ FILE *output, const char *section_name, uint64_t start, uint64_t end)
398
+ {
399
+ if (start != 0 && end != 0 ) {
400
+ unassemblize::Function func (*this , section_name, start, end);
401
+ if (m_outputFormat == OUTPUT_IGAS) {
402
+ func.disassemble (Function::FORMAT_IGAS);
403
+ } else {
404
+ func.disassemble (Function::FORMAT_AGAS);
405
+ }
406
+
407
+ const std::string &sym = get_symbol (start).name ;
408
+
409
+ if (!sym.empty ()) {
410
+ fprintf (output,
411
+ " .globl %s\n %s:\n %s" ,
412
+ sym.c_str (),
413
+ sym.c_str (),
414
+ func.dissassembly ().c_str ());
415
+ } else {
416
+ fprintf (output,
417
+ " .globl sub_%" PRIx64 " \n sub_%" PRIx64 " :\n %s" ,
418
+ start,
419
+ start,
420
+ func.dissassembly ().c_str ());
421
+ }
422
+ }
160
423
}
0 commit comments