Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion emsymbolizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(self, source=None, line=0, column=0, func=None):
def __init__(self):
self.version = None
self.sources = []
self.funcs = []
self.mappings = {}
self.offsets = []

Expand All @@ -121,6 +122,7 @@ def parse(self, filename):

self.version = source_map_json['version']
self.sources = source_map_json['sources']
self.funcs = source_map_json['names']

chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
vlq_map = {c: i for i, c in enumerate(chars)}
Expand Down Expand Up @@ -148,6 +150,7 @@ def decodeVLQ(string):
src = 0
line = 1
col = 1
func = 0
for segment in source_map_json['mappings'].split(','):
data = decodeVLQ(segment)
info = []
Expand All @@ -162,7 +165,9 @@ def decodeVLQ(string):
if len(data) >= 4:
col += data[3]
info.append(col)
# TODO: see if we need the name, which is the next field (data[4])
if len(data) == 5:
func += data[4]
info.append(func)

self.mappings[offset] = WasmSourceMap.Location(*info)
self.offsets.append(offset)
Expand All @@ -189,6 +194,7 @@ def lookup(self, offset):
self.sources[info.source] if info.source is not None else None,
info.line,
info.column,
self.funcs[info.func] if info.func is not None else None,
)


Expand Down
21 changes: 14 additions & 7 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -10938,10 +10938,11 @@ def check_dwarf_loc_info(address, funcs, locs):
for loc in locs:
self.assertIn(loc, out)

def check_source_map_loc_info(address, loc):
def check_source_map_loc_info(address, func, loc):
out = self.run_process(
[emsymbolizer, '-s', 'sourcemap', 'test_dwarf.wasm', address],
stdout=PIPE).stdout
self.assertIn(func, out)
self.assertIn(loc, out)

# We test two locations within test_dwarf.c:
Expand All @@ -10968,22 +10969,28 @@ def check_source_map_loc_info(address, loc):

# 1. Test DWARF + source map together
# For DWARF, we check for the full inlined info for both function names and
# source locations. Source maps provide neither function names nor inlined
# info. So we only check for the source location of the outermost function.
# source locations. Source maps does not provide inlined info. So we only
# check for the info of the outermost function.
check_dwarf_loc_info(out_to_js_call_addr, out_to_js_call_func,
out_to_js_call_loc)
check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_loc[0])
check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_func[0],
out_to_js_call_loc[0])
check_dwarf_loc_info(unreachable_addr, unreachable_func, unreachable_loc)
check_source_map_loc_info(unreachable_addr, unreachable_loc[0])
# Source map shows the original (inlined) source location with the function
# name that was inlined into
check_source_map_loc_info(unreachable_addr, unreachable_func[1],
unreachable_loc[0])

# 2. Test source map only
# The addresses, function names, and source locations are the same across
# the builds because they are relative offsets from the code section, so we
# don't need to recompute them
self.run_process([EMCC, test_file('core/test_dwarf.c'),
'-gsource-map', '-O1', '-o', 'test_dwarf.js'])
check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_loc[0])
check_source_map_loc_info(unreachable_addr, unreachable_loc[0])
check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_func[0],
out_to_js_call_loc[0])
check_source_map_loc_info(unreachable_addr, unreachable_func[1],
unreachable_loc[0])

# 3. Test DWARF only
self.run_process([EMCC, test_file('core/test_dwarf.c'),
Expand Down
55 changes: 51 additions & 4 deletions tools/wasm-sourcemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"""

import argparse
import bisect
import json
import logging
from math import floor, log
Expand All @@ -27,6 +28,7 @@
from tools import utils
from tools.system_libs import DETERMINISTIC_PREFIX
from tools.shared import path_from_root
from tools import webassembly

EMSCRIPTEN_PREFIX = utils.normalize_path(path_from_root())

Expand Down Expand Up @@ -300,19 +302,57 @@ def read_dwarf_entries(wasm, options):
return sorted(entries, key=lambda entry: entry['address'])


def build_sourcemap(entries, code_section_offset, options):
def read_func_ranges(wasm_input):
with webassembly.Module(wasm_input) as module:
if not module.has_name_section():
return []
funcs = module.get_functions()
func_names = module.get_function_names()[module.num_imported_funcs():]
assert len(funcs) == len(func_names)

# Replace '__original_main' with 'main'
try:
original_main_index = func_names.index('__original_main')
func_names[original_main_index] = 'main'
except ValueError:
pass

func_ranges = [(n, (f.offset, f.offset + f.size)) for n, f in zip(func_names, funcs)]
return func_ranges


def build_sourcemap(entries, func_ranges, code_section_offset, options):
base_path = options.basepath
collect_sources = options.sources
prefixes = SourceMapPrefixes(options.prefix, options.load_prefix, base_path)

func_low_pcs = [item[1][0] for item in func_ranges]

sources = []
sources_content = []
names = [item[0] for item in func_ranges]
mappings = []
sources_map = {}
last_address = 0
last_source_id = 0
last_line = 1
last_column = 1
last_func_id = 0

# Get the function ID that the given address falls into
def get_function_id(address):
if not func_ranges:
return None
index = bisect.bisect_right(func_low_pcs, address)
if index == 0: # The address is lower than the first function's start
return None
candidate_index = index - 1
name, (low_pc, high_pc) = func_ranges[candidate_index]
# Check the address within the candidate's [low_pc, high_pc) range. If not,
# it is in a gap between functions.
if low_pc <= address < high_pc:
return candidate_index
return None

for entry in entries:
line = entry['line']
Expand Down Expand Up @@ -343,21 +383,27 @@ def build_sourcemap(entries, code_section_offset, options):
sources_content.append(None)
else:
source_id = sources_map[source_name]
func_id = get_function_id(address)

address_delta = address - last_address
source_id_delta = source_id - last_source_id
line_delta = line - last_line
column_delta = column - last_column
mappings.append(encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta))
last_address = address
last_source_id = source_id
last_line = line
last_column = column
mapping = encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta)
if func_id is not None:
func_id_delta = func_id - last_func_id
last_func_id = func_id
mapping += encode_vlq(func_id_delta)
mappings.append(mapping)

return {'version': 3,
'sources': sources,
'sourcesContent': sources_content,
'names': [],
'names': names,
'mappings': ','.join(mappings)}


Expand All @@ -369,11 +415,12 @@ def main():
wasm = infile.read()

entries = read_dwarf_entries(wasm_input, options)
func_ranges = read_func_ranges(wasm_input)

code_section_offset = get_code_section_offset(wasm)

logger.debug('Saving to %s' % options.output)
map = build_sourcemap(entries, code_section_offset, options)
map = build_sourcemap(entries, func_ranges, code_section_offset, options)
with open(options.output, 'w', encoding='utf-8') as outfile:
json.dump(map, outfile, separators=(',', ':'), ensure_ascii=False)

Expand Down
28 changes: 28 additions & 0 deletions tools/webassembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,34 @@ def get_function_types(self):
def has_name_section(self):
return self.get_custom_section('name') is not None

@memoize
def get_function_names(self):
num_funcs = self.num_imported_funcs() + len(self.get_functions())
names = [None] * num_funcs

name_section = self.get_custom_section('name')
if not name_section:
return names

self.seek(name_section.offset)
self.read_string() # section name
section_end = name_section.offset + name_section.size

while self.tell() < section_end:
subsection_id = self.read_byte()
subsection_size = self.read_uleb()
if subsection_id == 1: # function names
count = self.read_uleb()
for _ in range(count):
func_idx = self.read_uleb()
func_name = self.read_string()
assert func_idx < len(names)
names[func_idx] = func_name
else:
self.skip(subsection_size)

return names

@once
def _calc_indexes(self):
self.imports_by_kind = {}
Expand Down