Skip to content

Commit a0d858d

Browse files
committed
Refactor towards full binary dissassembly.
1 parent a108fe9 commit a0d858d

File tree

5 files changed

+511
-107
lines changed

5 files changed

+511
-107
lines changed

executable.cpp

+286-23
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,37 @@
1+
/**
2+
* @file
3+
*
4+
* @brief Class encapsulating the executable being dissassembled.
5+
*
6+
* @copyright Assemblize is free software: you can redistribute it and/or
7+
* modify it under the terms of the GNU General Public License
8+
* as published by the Free Software Foundation, either version
9+
* 3 of the License, or (at your option) any later version.
10+
* A full copy of the GNU General Public License can be found in
11+
* LICENSE
12+
*/
113
#include "executable.h"
14+
#include "function.h"
215
#include <LIEF/LIEF.hpp>
316
#include <fstream>
417
#include <iostream>
518
#include <nlohmann/json.hpp>
19+
#include <strings.h>
620

721
const char unassemblize::Executable::s_symbolSection[] = "symbols";
8-
9-
unassemblize::Executable::Executable(const char *file_name, bool verbose) :
10-
m_binary(LIEF::Parser::parse(file_name)), m_endAddress(0), m_verbose(verbose)
22+
const char unassemblize::Executable::s_sectionsSection[] = "sections";
23+
const char unassemblize::Executable::s_configSection[] = "config";
24+
const char unassemblize::Executable::s_objectSection[] = "objects";
25+
26+
unassemblize::Executable::Executable(const char *file_name, OutputFormats format, bool verbose) :
27+
m_binary(LIEF::Parser::parse(file_name)),
28+
m_endAddress(0),
29+
m_outputFormat(format),
30+
m_codeAlignment(sizeof(uint32_t)),
31+
m_dataAlignment(sizeof(uint32_t)),
32+
m_codePad(0x90), // NOP
33+
m_dataPad(0x00),
34+
m_verbose(verbose)
1135
{
1236
if (m_verbose) {
1337
printf("Loading section info...\n");
@@ -30,6 +54,14 @@ unassemblize::Executable::Executable(const char *file_name, bool verbose) :
3054
if (section.address + section.size > m_endAddress) {
3155
m_endAddress = section.address + section.size;
3256
}
57+
58+
// Naive split on whether section contains data or code... have entrypoint? Code, else data.
59+
// Needs to be refined by providing a config file with section types specified.
60+
if (section.address <= m_binary->entrypoint() && section.address + section.size >= m_binary->entrypoint()) {
61+
section.type = SECTION_CODE;
62+
} else {
63+
section.type = SECTION_DATA;
64+
}
3365
}
3466
}
3567

@@ -44,6 +76,15 @@ unassemblize::Executable::Executable(const char *file_name, bool verbose) :
4476
m_symbolMap.insert({it->value(), Symbol(it->name(), it->value(), it->size())});
4577
}
4678
}
79+
80+
auto exe_imports = m_binary->imported_functions();
81+
82+
for (auto it = exe_imports.begin(); it != exe_imports.end(); ++it) {
83+
if (it->value() != 0 && !it->name().empty() && m_symbolMap.find(it->value()) == m_symbolMap.end()) {
84+
m_loadedSymbols.push_back(it->name());
85+
m_symbolMap.insert({it->value(), Symbol(m_loadedSymbols.back(), it->value(), it->size())});
86+
}
87+
}
4788
}
4889

4990
const uint8_t *unassemblize::Executable::section_data(const char *name) const
@@ -70,6 +111,19 @@ uint64_t unassemblize::Executable::base_address() const
70111
}
71112

72113
const unassemblize::Executable::Symbol &unassemblize::Executable::get_symbol(uint64_t addr) const
114+
{
115+
static std::string empty;
116+
static Symbol def(empty, 0, 0);
117+
auto it = m_symbolMap.find(addr);
118+
119+
if (it != m_symbolMap.end()) {
120+
return it->second;
121+
}
122+
123+
return def;
124+
}
125+
126+
const unassemblize::Executable::Symbol &unassemblize::Executable::get_nearest_symbol(uint64_t addr) const
73127
{
74128
static std::string empty;
75129
static Symbol def(empty, 0, 0);
@@ -86,18 +140,103 @@ const unassemblize::Executable::Symbol &unassemblize::Executable::get_symbol(uin
86140
return def;
87141
}
88142

89-
void unassemblize::Executable::load_symbols(const char *file_name)
143+
void unassemblize::Executable::add_symbol(const char *sym, uint64_t addr)
144+
{
145+
if (m_symbolMap.find(addr) == m_symbolMap.end()) {
146+
m_loadedSymbols.push_back(sym);
147+
m_symbolMap.insert({addr, Symbol(m_loadedSymbols.back(), addr, 0)});
148+
}
149+
}
150+
151+
void unassemblize::Executable::load_config(const char *file_name)
90152
{
153+
if (m_verbose) {
154+
printf("Loading config file '%s'...\n", file_name);
155+
}
156+
91157
std::ifstream fs(file_name);
92158

93159
if (!fs.good()) {
94160
return;
95161
}
96162

97163
nlohmann::json j = nlohmann::json::parse(fs);
98-
auto &jsyms = j.at(s_symbolSection);
99164

100-
for (auto it = jsyms.begin(); it != jsyms.end(); ++it) {
165+
if (j.find(s_configSection) != j.end()) {
166+
nlohmann::json &conf = j.at(s_configSection);
167+
conf.at("codealign").get_to(m_codeAlignment);
168+
conf.at("dataalign").get_to(m_dataAlignment);
169+
conf.at("codepadding").get_to(m_codePad);
170+
conf.at("datapadding").get_to(m_dataPad);
171+
}
172+
173+
if (j.find(s_symbolSection) != j.end()) {
174+
load_symbols(j.at(s_symbolSection));
175+
}
176+
177+
if (j.find(s_sectionsSection) != j.end()) {
178+
load_sections(j.at(s_sectionsSection));
179+
}
180+
181+
if (j.find(s_objectSection) != j.end()) {
182+
load_objects(j.at(s_objectSection));
183+
}
184+
}
185+
186+
void unassemblize::Executable::save_config(const char *file_name)
187+
{
188+
if (m_verbose) {
189+
printf("Saving config file '%s'...\n", file_name);
190+
}
191+
192+
nlohmann::json j;
193+
194+
// Parse the config file if it already exists and update it.
195+
{
196+
std::ifstream fs(file_name);
197+
198+
if (fs.good()) {
199+
j = nlohmann::json::parse(fs);
200+
}
201+
}
202+
203+
if (j.find(s_configSection) == j.end()) {
204+
j[s_configSection] = nlohmann::json();
205+
}
206+
207+
nlohmann::json &conf = j.at(s_configSection);
208+
conf["codealign"] = m_codeAlignment;
209+
conf["dataalign"] = m_dataAlignment;
210+
conf["codepadding"] = m_codePad;
211+
conf["datapadding"] = m_dataPad;
212+
213+
// Don't dump if we already have a sections for these.
214+
if (j.find(s_symbolSection) == j.end()) {
215+
j[s_symbolSection] = nlohmann::json();
216+
dump_symbols(j.at(s_symbolSection));
217+
}
218+
219+
if (j.find(s_sectionsSection) == j.end()) {
220+
j[s_sectionsSection] = nlohmann::json();
221+
dump_sections(j.at(s_sectionsSection));
222+
}
223+
224+
if (j.find(s_objectSection) == j.end()) {
225+
j[s_objectSection] = nlohmann::json();
226+
dump_objects(j.at(s_objectSection));
227+
}
228+
229+
std::ofstream fs(file_name);
230+
fs << std::setw(4) << j << std::endl;
231+
}
232+
233+
void unassemblize::Executable::load_symbols(nlohmann::json &js)
234+
{
235+
if (m_verbose) {
236+
printf("Loading external symbols...\n");
237+
}
238+
239+
for (auto it = js.begin(); it != js.end(); ++it) {
101240
std::string name;
102241
it->at("name").get_to(name);
103242

@@ -126,35 +265,159 @@ void unassemblize::Executable::load_symbols(const char *file_name)
126265
}
127266
}
128267

129-
void unassemblize::Executable::dump_symbols(const char *file_name)
268+
void unassemblize::Executable::dump_symbols(nlohmann::json &js)
130269
{
131-
auto exe_syms = m_binary->symbols();
132-
nlohmann::json j;
270+
if (m_verbose) {
271+
printf("Saving symbols...\n");
272+
}
133273

134-
{
135-
std::ifstream fs(file_name);
274+
for (auto it = m_symbolMap.begin(); it != m_symbolMap.end(); ++it) {
275+
js.push_back({{"name", it->second.name}, {"address", it->second.value}, {"size", it->second.size}});
276+
}
277+
}
136278

137-
if (fs.good()) {
138-
j = nlohmann::json::parse(fs);
279+
void unassemblize::Executable::load_sections(nlohmann::json &js)
280+
{
281+
if (m_verbose) {
282+
printf("Loading section info...\n");
283+
}
284+
285+
for (auto it = js.begin(); it != js.end(); ++it) {
286+
std::string name;
287+
it->at("name").get_to(name);
288+
289+
// Don't try and load an empty symbol.
290+
if (!name.empty()) {
291+
auto section = m_sections.find(name);
292+
293+
if (section == m_sections.end() && m_verbose) {
294+
printf("Tried to load section info for section not present in this binary!\n");
295+
printf("Section '%s' info was ignored.\n", name.c_str());
296+
}
297+
298+
std::string type;
299+
it->at("type").get_to(type);
300+
301+
if (strcasecmp(type.c_str(), "code") == 0) {
302+
section->second.type = SECTION_CODE;
303+
} else if (strcasecmp(type.c_str(), "data") == 0) {
304+
section->second.type = SECTION_DATA;
305+
} else if (m_verbose) {
306+
printf("Incorrect type specified for section '%s'.\n", name.c_str());
307+
}
139308
}
140309
}
310+
}
141311

312+
void unassemblize::Executable::dump_sections(nlohmann::json &js)
313+
{
142314
if (m_verbose) {
143-
printf("Dumping symbols to file '%s'...\n", file_name);
315+
printf("Saving section info...\n");
144316
}
145317

146-
// Remove symbols if it already exists and repopulate it.
147-
if (j.find(s_symbolSection) != j.end()) {
148-
j.erase(s_symbolSection);
318+
for (auto it = m_sections.begin(); it != m_sections.end(); ++it) {
319+
js.push_back({{"name", it->first}, {"type", it->second.type == SECTION_CODE ? "code" : "data"}});
149320
}
321+
}
150322

151-
j[s_symbolSection] = nlohmann::json();
152-
auto &syms = j.at(s_symbolSection);
323+
void unassemblize::Executable::load_objects(nlohmann::json &js)
324+
{
325+
if (m_verbose) {
326+
printf("Loading objects...\n");
327+
}
153328

154-
for (auto it = exe_syms.begin(); it != exe_syms.end(); ++it) {
155-
syms.push_back({{"name", it->name()}, {"address", it->value()}, {"size", it->size()}});
329+
for (auto it = js.begin(); it != js.end(); ++it) {
330+
std::string obj_name;
331+
it->at("name").get_to(obj_name);
332+
333+
if (obj_name.empty()) {
334+
continue;
335+
}
336+
337+
m_targetObjects.push_back({obj_name, std::list<ObjectSection>()});
338+
auto &obj = m_targetObjects.back();
339+
auto &sections = js.back().at("sections");
340+
341+
for (auto sec = sections.begin(); sec != sections.end(); ++sec) {
342+
std::string name;
343+
uint64_t start;
344+
uint64_t size;
345+
sec->at("name").get_to(name);
346+
sec->at("start").get_to(start);
347+
sec->at("size").get_to(size);
348+
obj.sections.push_back({name, start, size});
349+
}
156350
}
351+
}
157352

158-
std::ofstream fs(file_name);
159-
fs << std::setw(4) << j << std::endl;
353+
void unassemblize::Executable::dump_objects(nlohmann::json &js)
354+
{
355+
if (m_verbose) {
356+
printf("Saving objects...\n");
357+
}
358+
359+
if (m_targetObjects.empty()) {
360+
m_targetObjects.push_back(
361+
{m_binary->name().substr(m_binary->name().find_last_of("/\\") + 1), std::list<ObjectSection>()});
362+
auto &obj = m_targetObjects.back();
363+
364+
for (auto it = m_binary->sections().begin(); it != m_binary->sections().end(); ++it) {
365+
if (it->name().empty() || it->size() == 0) {
366+
continue;
367+
}
368+
369+
obj.sections.push_back({it->name(), 0, it->size()});
370+
}
371+
}
372+
373+
for (auto it = m_targetObjects.begin(); it != m_targetObjects.end(); ++it) {
374+
js.push_back({{"name", it->name}, {"sections", nlohmann::json()}});
375+
auto &sections = js.back().at("sections");
376+
377+
for (auto it2 = it->sections.begin(); it2 != it->sections.end(); ++it2) {
378+
sections.push_back({{"name", it2->name}, {"start", it2->start}, {"size", it2->size}});
379+
}
380+
}
381+
}
382+
383+
void unassemblize::Executable::dissassemble_function(
384+
FILE *output, const char *section_name, uint64_t start, uint64_t end)
385+
{
386+
// Abort if we can't output anywhere.
387+
if (output == nullptr) {
388+
return;
389+
}
390+
391+
if (m_outputFormat != OUTPUT_MASM) {
392+
dissassemble_gas_func(output, section_name, start, end);
393+
}
394+
}
395+
396+
void unassemblize::Executable::dissassemble_gas_func(
397+
FILE *output, const char *section_name, uint64_t start, uint64_t end)
398+
{
399+
if (start != 0 && end != 0) {
400+
unassemblize::Function func(*this, section_name, start, end);
401+
if (m_outputFormat == OUTPUT_IGAS) {
402+
func.disassemble(Function::FORMAT_IGAS);
403+
} else {
404+
func.disassemble(Function::FORMAT_AGAS);
405+
}
406+
407+
const std::string &sym = get_symbol(start).name;
408+
409+
if (!sym.empty()) {
410+
fprintf(output,
411+
".globl %s\n%s:\n%s",
412+
sym.c_str(),
413+
sym.c_str(),
414+
func.dissassembly().c_str());
415+
} else {
416+
fprintf(output,
417+
".globl sub_%" PRIx64 "\nsub_%" PRIx64 ":\n%s",
418+
start,
419+
start,
420+
func.dissassembly().c_str());
421+
}
422+
}
160423
}

0 commit comments

Comments
 (0)