•  


페이지를 파싱하기 어렵습니다.#undef asm_volatile_goto #define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") #include "bpfdefs.h" #ifdef TESTING_COREDUMP // defined in include/uapi/linux/perf_event.h #define PERF_MAX_STACK_DEPTH 127 #ifndef THREAD_SIZE // taken from the kernel sources #define THREAD_SIZE 16384 #endif #else #include #include #include #include #endif #include "frametypes.h" #include "types.h" #include "tracemgmt.h" #include "stackdeltatypes.h" // Macro to create a map named exe_id_to_X_stack_deltas that is a nested maps with a fileID for the // outer map and an array as inner map that holds up to 2^X stack delta entries for the given fileID. #define STACK_DELTA_BUCKET(X) \ bpf_map_def SEC("maps") exe_id_to_##X##_stack_deltas = { \ .type = BPF_MAP_TYPE_HASH_OF_MAPS, \ .key_size = sizeof(u64), \ .value_size = sizeof(u32), \ .max_entries = 4096, \ }; // Create buckets to hold the stack delta information for the executables. STACK_DELTA_BUCKET(8); STACK_DELTA_BUCKET(9); STACK_DELTA_BUCKET(10); STACK_DELTA_BUCKET(11); STACK_DELTA_BUCKET(12); STACK_DELTA_BUCKET(13); STACK_DELTA_BUCKET(14); STACK_DELTA_BUCKET(15); STACK_DELTA_BUCKET(16); STACK_DELTA_BUCKET(17); STACK_DELTA_BUCKET(18); STACK_DELTA_BUCKET(19); STACK_DELTA_BUCKET(20); STACK_DELTA_BUCKET(21); // Unwind info value for invalid stack delta #define STACK_DELTA_INVALID (STACK_DELTA_COMMAND_FLAG | UNWIND_COMMAND_INVALID) #define STACK_DELTA_STOP (STACK_DELTA_COMMAND_FLAG | UNWIND_COMMAND_STOP) // An array of unwind info contains the all the different UnwindInfo instances // needed system wide. Individual stack delta entries refer to this array. bpf_map_def SEC("maps") unwind_info_array = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(UnwindInfo), // Maximum number of unique stack deltas needed on a system. This is based on // normal desktop /usr/bin/* and /usr/lib/*.so having about 9700 unique deltas. // Can be increased up to 2^15, see also STACK_DELTA_COMMAND_FLAG. .max_entries = 16384, }; // The number of native frames to unwind per frame-unwinding eBPF program. #define NATIVE_FRAMES_PER_PROGRAM 4 // The decision whether to unwind native stacks or interpreter stacks is made by checking if a given // PC address falls into the "interpreter loop" of an interpreter. This map helps identify such // loops: The keys are those executable section IDs that contain interpreter loops, the values // identify the offset range within this executable section that contains the interpreter loop. bpf_map_def SEC("maps") interpreter_offsets = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u64), .value_size = sizeof(OffsetRange), .max_entries = 32, }; // Maps fileID and page to information of stack deltas associated with that page. bpf_map_def SEC("maps") stack_delta_page_to_info = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(StackDeltaPageKey), .value_size = sizeof(StackDeltaPageInfo), .max_entries = 40000, }; // This contains the kernel PCs as returned by bpf_get_stackid(). Unfortunately the ebpf // program cannot read the contents, so we return the stackid in the Trace directly, and // make the profiling agent read the kernel mode stack trace portion from this map. bpf_map_def SEC("maps") kernel_stackmap = { .type = BPF_MAP_TYPE_STACK_TRACE, .key_size = sizeof(u32), .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), .max_entries = 16*1024, }; #if defined(__aarch64__) // This contains the cached value of the pt_regs size structure as established by the // get_arm64_ptregs_size function struct bpf_map_def SEC("maps") ptregs_size = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(u64), .max_entries = 1, }; #endif // Record a native frame static inline __attribute__((__always_inline__)) ErrorCode push_native(Trace *trace, u64 file, u64 line) { return _push(trace, file, line, FRAME_MARKER_NATIVE); } #ifdef __aarch64__ // Strips the PAC tag from a pointer. // // While all pointers can contain PAC tags, we only apply this function to code pointers, because // that's where normalization is required to make the stack delta lookups work. Note that if that // should ever change, we'd need a different mask for the data pointers, because it might diverge // from the mask for code pointers. static inline u64 normalize_pac_ptr(u64 ptr) { // Retrieve PAC mask from the system config. u32 key = 0; SystemConfig* syscfg = bpf_map_lookup_elem(&system_config, &key); if (!syscfg) { // Unreachable: array maps are always fully initialized. return ptr; } // Mask off PAC bits. Since we're always applying this to usermode pointers that should have all // the high bits set to 0, we don't need to consider the case of having to fill up the resulting // hole with 1s (like we'd have to for kernel ptrs). ptr &= syscfg->inverse_pac_mask; return ptr; } #endif // A single step for the bsearch into the big_stack_deltas array. This is really a textbook bsearch // step, built in a way to update the value of *lo and *hi. This function will be called repeatedly // (since we cannot do loops). The return value signals whether the bsearch came to an end / found // the right element or whether it needs to continue. static inline __attribute__((__always_inline__)) bool bsearch_step(void* inner_map, u32* lo, u32* hi, u16 page_offset) { u32 pivot = (*lo + *hi) >> 1; StackDelta *delta = bpf_map_lookup_elem(inner_map, &pivot); if (!delta) { *hi = 0; return false; } if (page_offset >= delta->addrLow) { *lo = pivot + 1; } else { *hi = pivot; } return *lo < *hi;="" }="" get="" the="" outer="" map="" based="" on="" the="" number="" of="" stack="" delta="" entries.="" static="" inline="" __attribute__((__always_inline__))="" void="" *get_stack_delta_map(int="" mapid)="" {="" switch="" (mapid)="" {="" case="" 8:="" return="" &exe_id_to_8_stack_deltas;="" case="" 9:="" return="" &exe_id_to_9_stack_deltas;="" case="" 10:="" return="" &exe_id_to_10_stack_deltas;="" case="" 11:="" return="" &exe_id_to_11_stack_deltas;="" case="" 12:="" return="" &exe_id_to_12_stack_deltas;="" case="" 13:="" return="" &exe_id_to_13_stack_deltas;="" case="" 14:="" return="" &exe_id_to_14_stack_deltas;="" case="" 15:="" return="" &exe_id_to_15_stack_deltas;="" case="" 16:="" return="" &exe_id_to_16_stack_deltas;="" case="" 17:="" return="" &exe_id_to_17_stack_deltas;="" case="" 18:="" return="" &exe_id_to_18_stack_deltas;="" case="" 19:="" return="" &exe_id_to_19_stack_deltas;="" case="" 20:="" return="" &exe_id_to_20_stack_deltas;="" case="" 21:="" return="" &exe_id_to_21_stack_deltas;="" default:="" return="" null;="" }="" }="" get="" the="" stack="" offset="" of="" the="" given="" instruction.="" static="" errorcode="" get_stack_delta(u64="" text_section_id,="" u64="" text_section_offset,="" int*="" addrdiff,="" u32*="" unwindinfo)="" {="" u64="" exe_id="text_section_id;" look="" up="" the="" stack="" delta="" page="" information="" for="" this="" address.="" stackdeltapagekey="" key="{" };="" key.fileid="text_section_id;" key.page="text_section_offset" &="" ~stack_delta_page_mask;="" debug_print("look="" up="" stack="" delta="" for="" %lx:%lx",="" (unsigned="" long)text_section_id,="" (unsigned="" long)text_section_offset);="" stackdeltapageinfo="" *info="bpf_map_lookup_elem(&stack_delta_page_to_info," &key);="" if="" (!info)="" {="" debug_print("failure="" to="" look="" up="" stack="" delta="" page="" fileid="" %lx,="" page="" %lx",="" (unsigned="" long)key.fileid,="" (unsigned="" long)key.page);="" increment_metric(metricid_unwindnativeerrlookuptextsection);="" return="" err_native_lookup_text_section;="" }="" void="" *outer_map="get_stack_delta_map(info-">mapID); if (!outer_map) { DEBUG_PRINT("Failure to look up outer map for text section %lx in mapID %d", (unsigned long) exe_id, (int) info->mapID); increment_metric(metricID_UnwindNativeErrLookupStackDeltaOuterMap); return ERR_NATIVE_LOOKUP_STACK_DELTA_OUTER_MAP; } void *inner_map = bpf_map_lookup_elem(outer_map, &exe_id); if (!inner_map) { DEBUG_PRINT("Failure to look up inner map for text section %lx", (unsigned long) exe_id); increment_metric(metricID_UnwindNativeErrLookupStackDeltaInnerMap); return ERR_NATIVE_LOOKUP_STACK_DELTA_INNER_MAP; } // Preinitialize the idx for the index to use for page without any deltas. u32 idx = info->firstDelta; u16 page_offset = text_section_offset & STACK_DELTA_PAGE_MASK; if (info->numDeltas) { // Page has deltas, so find the correct one to use using binary search. u32 lo = info->firstDelta; u32 hi = lo + info->numDeltas; DEBUG_PRINT("Intervals should be from %lu to %lu (mapID %d)", (unsigned long) lo, (unsigned long) hi, (int)info->mapID); // Do the binary search, up to 16 iterations. Deltas are paged to 64kB pages. // They can contain at most 64kB deltas even if everything is single byte opcodes. int i; #pragma unroll for (i = 0; i < 16;="" i++)="" {="" if="" (!bsearch_step(inner_map,="" &lo,="" &hi,="" page_offset))="" {="" break;="" }="" }="" if="" (i="">= 16 || hi == 0) { DEBUG_PRINT("Failed bsearch in 16 steps. Corrupt data?"); increment_metric(metricID_UnwindNativeErrLookupIterations); return ERR_NATIVE_EXCEEDED_DELTA_LOOKUP_ITERATIONS; } // After bsearch, 'hi' points to the first entry greater than the requested. idx = hi; } // The code above found the first entry with greater address than requested, // so it needs to be decremented by one to get the entry with equal-or-less. // This makes also the logic work cross-pages: if the first entry in within // the page is too large, this actually gets the entry from the previous page. idx--; StackDelta *delta = bpf_map_lookup_elem(inner_map, &idx); if (!delta) { increment_metric(metricID_UnwindNativeErrLookupRange); return ERR_NATIVE_LOOKUP_RANGE; } DEBUG_PRINT("delta index %d, addrLow 0x%x, unwindInfo %d", idx, delta->addrLow, delta->unwindInfo); // Calculate PC delta from stack delta for merged delta comparison int deltaOffset = (int)page_offset - (int)delta->addrLow; if (idx < info-="">firstDelta) { // PC is below the first delta of the corresponding page. This means that // delta->addrLow contains address relative to one page before the page_offset. // Fix up the deltaOffset with this difference of base pages. deltaOffset += 1 < stack_delta_page_bits;="" }="" *addrdiff="deltaOffset;" *unwindinfo="delta-">unwindInfo; if (delta->unwindInfo == STACK_DELTA_INVALID) { increment_metric(metricID_UnwindNativeErrStackDeltaInvalid); return ERR_NATIVE_STACK_DELTA_INVALID; } if (delta->unwindInfo == STACK_DELTA_STOP) { increment_metric(metricID_UnwindNativeStackDeltaStop); } return ERR_OK; } // unwind_register_address calculates the given expression ('opcode'/'param') to get // the CFA (canonical frame address, to recover PC and be used in further calculations), // or the address where a register is stored (FP currently), so that the value of // the register can be recovered. // // Currently the following expressions are supported: // 1. Not recoverable -> NULL is returned. // 2. When UNWIND_OPCODEF_DEREF is not set: // BASE + param // 3. When UNWIND_OPCODEF_DEREF is set: // *(BASE + preDeref) + postDeref static inline __attribute__((__always_inline__)) u64 unwind_register_address(UnwindState *state, u64 cfa, u8 opcode, s32 param) { unsigned long addr, val; s32 preDeref = param, postDeref = 0; if (opcode & UNWIND_OPCODEF_DEREF) { // For expressions that dereference the base expression, the parameter is constructed // of pre-dereference and post-derefence operands. Unpack those. preDeref &= ~UNWIND_DEREF_MASK; postDeref = (param & UNWIND_DEREF_MASK) * UNWIND_DEREF_MULTIPLIER; } // Resolve the 'BASE' register, and fetch the CFA/FP/SP value. switch (opcode & ~UNWIND_OPCODEF_DEREF) { case UNWIND_OPCODE_BASE_CFA: addr = cfa; break; case UNWIND_OPCODE_BASE_FP: addr = state->fp; break; case UNWIND_OPCODE_BASE_SP: addr = state->sp; break; #if defined(__aarch64__) case UNWIND_OPCODE_BASE_LR: DEBUG_PRINT("unwind: lr"); if (state->lr == 0) { increment_metric(metricID_UnwindNativeLr0); DEBUG_PRINT("Failure to unwind frame: zero LR at %llx", state->pc); return 0; } return state->lr; #endif default: return 0; } #ifdef OPTI_DEBUG switch (opcode) { case UNWIND_OPCODE_BASE_CFA: DEBUG_PRINT("unwind: cfa+%d", preDeref); break; case UNWIND_OPCODE_BASE_FP: DEBUG_PRINT("unwind: fp+%d", preDeref); break; case UNWIND_OPCODE_BASE_SP: DEBUG_PRINT("unwind: sp+%d", preDeref); break; case UNWIND_OPCODE_BASE_CFA | UNWIND_OPCODEF_DEREF: DEBUG_PRINT("unwind: *(cfa+%d)+%d", preDeref, postDeref); break; case UNWIND_OPCODE_BASE_FP | UNWIND_OPCODEF_DEREF: DEBUG_PRINT("unwind: *(fp+%d)+%d", preDeref, postDeref); break; case UNWIND_OPCODE_BASE_SP | UNWIND_OPCODEF_DEREF: DEBUG_PRINT("unwind: *(sp+%d)+%d", preDeref, postDeref); break; } #endif // Adjust based on parameter / preDereference adder. addr += preDeref; if ((opcode & UNWIND_OPCODEF_DEREF) == 0) { // All done: return "BASE + param" return addr; } // Dereference, and add the postDereference adder. if (bpf_probe_read(&val, sizeof(val), (void*) addr)) { DEBUG_PRINT("unwind failed to dereference address 0x%lx", addr); return 0; } // Return: "*(BASE + preDeref) + postDeref" return val + postDeref; } // Stack unwinding in the absence of frame pointers can be a bit involved, so // this comment explains what the following code does. // // One begins unwinding a frame somewhere in the middle of execution. // On x86_64, registers RIP (PC), RSP (SP), and RBP (FP) are available. // // This function resolves a "stack delta" command from from our internal maps. // This stack delta refers to a rule on how to unwind the state. In the simple // case it just provides SP delta and potentially offset from where to recover // FP value. See unwind_register_address() on the expressions supported. // // The function sets the bool pointed to by the given `stop` pointer to `false` // if the main ebpf unwinder should exit. This is the case if the current PC // is marked with UNWIND_COMMAND_STOP which marks entry points (main function, // thread spawn function, signal handlers, ...). #if defined(__x86_64__) static ErrorCode unwind_one_frame(u64 pid, u32 frame_idx, UnwindState *state, bool* stop) { *stop = false; u32 unwindInfo = 0; u64 rt_regs[18]; int addrDiff = 0; u64 cfa = 0; // The relevant executable is compiled with frame pointer omission, so // stack deltas need to be retrieved from the relevant map. ErrorCode error = get_stack_delta(state->text_section_id, state->text_section_offset, &addrDiff, &unwindInfo); if (error) { return error; } if (unwindInfo & STACK_DELTA_COMMAND_FLAG) { switch (unwindInfo & ~STACK_DELTA_COMMAND_FLAG) { case UNWIND_COMMAND_PLT: // The toolchains routinely emit a fixed DWARF expression to unwind the full // PLT table with one expression to reduce .eh_frame size. // This is the hard coded implementation of this expression. For further details, // see https://hal.inria.fr/hal-02297690/document, page 4. (DOI: 10.1145/3360572) cfa = state->sp + 8 + ((((state->pc & 15) >= 11) ? 1 : 0) < 3);="" debug_print("plt,="" cfa="0x%lx"," (unsigned="" long)cfa);="" break;="" case="" unwind_command_signal:="" the="" rt_sigframe="" is="" defined="" at:="" https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/sigframe.h?h="v6.4#n59" https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/uapi/asm/sigcontext.h?h="v6.4#n238" offsetof(struct="" rt_sigframe,="" uc.uc_mcontext)="40" if="" (bpf_probe_read(&rt_regs,="" sizeof(rt_regs),="" (void*)(state-="">sp + 40))) { goto err_native_pc_read; } state->r13 = rt_regs[5]; state->fp = rt_regs[10]; state->sp = rt_regs[15]; state->pc = rt_regs[16]; goto frame_ok; case UNWIND_COMMAND_STOP: *stop = true; return ERR_OK; default: return ERR_UNREACHABLE; } } else { UnwindInfo *info = bpf_map_lookup_elem(&unwind_info_array, &unwindInfo); if (!info) { increment_metric(metricID_UnwindNativeErrBadUnwindInfoIndex); return ERR_NATIVE_BAD_UNWIND_INFO_INDEX; } s32 param = info->param; if (info->mergeOpcode) { DEBUG_PRINT("AddrDiff %d, merged delta %#02x", addrDiff, info->mergeOpcode); if (addrDiff >= (info->mergeOpcode & ~MERGEOPCODE_NEGATIVE)) { param += (info->mergeOpcode & MERGEOPCODE_NEGATIVE) ? -8 : 8; DEBUG_PRINT("Merged delta match: cfaDelta=%d", unwindInfo); } } // Resolve the frame's CFA (previous PC is fixed to CFA) address, and // the previous FP address if any. cfa = unwind_register_address(state, 0, info->opcode, param); u64 fpa = unwind_register_address(state, cfa, info->fpOpcode, info->fpParam); if (fpa) { bpf_probe_read(&state->fp, sizeof(state->fp), (void*)fpa); } else if (info->opcode == UNWIND_OPCODE_BASE_FP) { // FP used for recovery, but no new FP value received, clear FP state->fp = 0; } } if (!cfa || bpf_probe_read(&state->pc, sizeof(state->pc), (void*)(cfa - 8))) { err_native_pc_read: increment_metric(metricID_UnwindNativeErrPCRead); return ERR_NATIVE_PC_READ; } state->sp = cfa; frame_ok: increment_metric(metricID_UnwindNativeFrames); return ERR_OK; } #elif defined(__aarch64__) static ErrorCode unwind_one_frame(u64 pid, u32 frame_idx, struct UnwindState *state, bool* stop) { *stop = false; u32 unwindInfo = 0; int addrDiff = 0; u64 rt_regs[34]; u64 cfa = 0; // The relevant executable is compiled with frame pointer omission, so // stack deltas need to be retrieved from the relevant map. ErrorCode error = get_stack_delta(state->text_section_id, state->text_section_offset, &addrDiff, &unwindInfo); if (error) { return error; } if (unwindInfo & STACK_DELTA_COMMAND_FLAG) { switch (unwindInfo & ~STACK_DELTA_COMMAND_FLAG) { case UNWIND_COMMAND_SIGNAL: // On aarch64 the struct rt_sigframe is at: // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/kernel/signal.c?h=v6.4#n39 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/include/uapi/asm/sigcontext.h?h=v6.4#n28 // offsetof(struct rt_sigframe, uc.uc_mcontext.regs[0]) = 312 // offsetof(struct rt_sigframe, uc) 128 + // offsetof(struct ucontext, uc_mcontext) 176 + // offsetof(struct sigcontext, regs[0]) 8 if (bpf_probe_read(&rt_regs, sizeof(rt_regs), (void*)(state->sp + 312))) { goto err_native_pc_read; } state->pc = normalize_pac_ptr(rt_regs[32]); state->sp = rt_regs[31]; state->fp = rt_regs[29]; state->lr = normalize_pac_ptr(rt_regs[30]); state->r22 = rt_regs[22]; state->lr_valid = true; goto frame_ok; case UNWIND_COMMAND_STOP: *stop = true; return ERR_OK; default: return ERR_UNREACHABLE; } } UnwindInfo *info = bpf_map_lookup_elem(&unwind_info_array, &unwindInfo); if (!info) { increment_metric(metricID_UnwindNativeErrBadUnwindInfoIndex); DEBUG_PRINT("Giving up due to invalid unwind info array index"); return ERR_NATIVE_BAD_UNWIND_INFO_INDEX; } s32 param = info->param; if (info->mergeOpcode) { DEBUG_PRINT("AddrDiff %d, merged delta %#02x", addrDiff, info->mergeOpcode); if (addrDiff >= (info->mergeOpcode & ~MERGEOPCODE_NEGATIVE)) { param += (info->mergeOpcode & MERGEOPCODE_NEGATIVE) ? -8 : 8; DEBUG_PRINT("Merged delta match: cfaDelta=%d", unwindInfo); } } // Resolve the frame CFA (previous PC is fixed to CFA) address cfa = unwind_register_address(state, 0, info->opcode, param); // Resolve Return Address, it is either the value of link register or // stack address where RA is stored u64 ra = unwind_register_address(state, cfa, info->fpOpcode, info->fpParam); if (ra) { if (info->fpOpcode == UNWIND_OPCODE_BASE_LR) { // Allow LR unwinding only if it's known to be valid: either because // it's the topmost user-mode frame, or recovered by signal trampoline. if (!state->lr_valid) { increment_metric(metricID_UnwindNativeErrLrUnwindingMidTrace); return ERR_NATIVE_LR_UNWINDING_MID_TRACE; } // set return address location to link register state->pc = ra; } else { DEBUG_PRINT("RA: %016llX", (u64)ra); // read the value of RA from stack if (bpf_probe_read(&state->pc, sizeof(state->pc), (void*)ra)) { // error reading memory, mark RA as invalid ra = 0; } } state->pc = normalize_pac_ptr(state->pc); } if (!ra) { err_native_pc_read: // report failure to resolve RA and stop unwinding increment_metric(metricID_UnwindNativeErrPCRead); DEBUG_PRINT("Giving up due to failure to resolve RA"); return ERR_NATIVE_PC_READ; } // Try to resolve frame pointer // simple heuristic for FP based frames // the GCC compiler usually generates stack frame records in such a way, // so that FP/RA pair is at the bottom of a stack frame (stack frame // record at lower addresses is followed by stack vars at higher ones) // this implies that if no other changes are applied to the stack such // as alloca(), following the prolog SP/FP points to the frame record // itself, in such a case FP offset will be equal to 8 if (info->fpParam == 8) { // we can assume the presence of frame pointers if (info->fpOpcode != UNWIND_OPCODE_BASE_LR) { // FP precedes the RA on the stack (Aarch64 ABI requirement) bpf_probe_read(&state->fp, sizeof(state->fp), (void*)(ra - 8)); } } state->sp = cfa; state->lr_valid = false; frame_ok: increment_metric(metricID_UnwindNativeFrames); return ERR_OK; } #else #error unsupported architecture #endif // Initialize state from pt_regs static inline void copy_state_regs(UnwindState *state, struct pt_regs *regs) { #if defined(__x86_64__) state->pc = regs->ip; state->sp = regs->sp; state->fp = regs->bp; state->r13 = regs->r13; #elif defined(__aarch64__) state->pc = normalize_pac_ptr(regs->pc); state->sp = regs->sp; state->fp = regs->regs[29]; state->lr = normalize_pac_ptr(regs->regs[30]); state->r22 = regs->regs[22]; state->lr_valid = true; #endif } #if defined(__aarch64__) // on ARM64, the size of pt_regs structure is not constant across kernel // versions as in x86-64 platform (!) // get_arm64_ptregs_size tries to find out the size of this structure with the // help of a simple heuristic, this size is needed for locating user process // registers on kernel stack static inline u64 get_arm64_ptregs_size(u64 stack_top) { // this var should be static, but verifier complains, in the meantime // just leave it here u32 key0 = 0; u64 pc, sp; u64 *paddr = bpf_map_lookup_elem(&ptregs_size, &key0); if (!paddr) { DEBUG_PRINT("Failed to look up ptregs_size map"); return -1; } // read current (possibly cached) value of pt_regs structure size u64 arm64_ptregs_size = *paddr; // if the size of pt_regs has been already established, just return it if (arm64_ptregs_size) return arm64_ptregs_size; // assume default pt_regs structure size as for kernel 4.19.120 arm64_ptregs_size = sizeof(struct pt_regs); // the candidate addr where pt_regs structure may start u64 ptregs_candidate_addr = stack_top - arm64_ptregs_size; struct pt_regs *regs = (struct pt_regs*)ptregs_candidate_addr; // read the value of pc and sp registers if (bpf_probe_read(&pc, sizeof(pc), ®s->pc) || bpf_probe_read(&sp, sizeof(sp), ®s->sp)) { goto exit; } // if pc and sp are kernel pointers, we may assume correct candidate // addr of pt_regs structure (as seen 4.19.120 and 5.4.38 kernels) if (is_kernel_address(pc) && is_kernel_address(sp)) { DEBUG_PRINT("default pt_regs struct size"); goto exit; } // this is likely pt_regs structure as present in kernel 5.10.0-7 // with two extra fields in pt_regs: // u64 lockdep_hardirqs; // u64 exit_rcu; // Adjust arm64_ptregs_size for that DEBUG_PRINT("adjusted pt_regs struct size"); arm64_ptregs_size += 2*sizeof(u64); exit: // update the map (cache the value) if (!bpf_map_update_elem(&ptregs_size, &key0, &arm64_ptregs_size, BPF_ANY)) { DEBUG_PRINT("Failed to update ptregs_size map"); } return arm64_ptregs_size; } #endif // Convert kernel stack pointer to pt_regs pointers at the top-of-stack. static inline void *get_kernel_stack_ptregs(u64 addr) { #if defined(__x86_64__) // Fortunately on x86_64 IRQ_STACK_SIZE = THREAD_SIZE. Both depend on // CONFIG_KASAN, but that can be assumed off on all production systems. // The thread kernel stack should be aligned at least to THREAD_SIZE so the // below calculation should yield correct end of stack. u64 stack_end = (addr | (THREAD_SIZE - 1)) + 1; return (void*)(stack_end - sizeof(struct pt_regs)); #elif defined(__aarch64__) u64 stack_end = (addr | (THREAD_SIZE - 1)) + 1; u64 ptregs_size = get_arm64_ptregs_size(stack_end); return (void*)(stack_end - ptregs_size); #endif } // Extract the usermode pt_regs for given context. // // State registers are not touched (get_pristine_per_cpu_record already reset it) // if something fails. has_usermode_regs receives a boolean indicating whether a // user-mode register context was found: not every thread that we interrupt will // actually have a user-mode context (e.g. kernel worker threads won't). static inline ErrorCode get_usermode_regs(struct pt_regs *ctx, UnwindState *state, bool *has_usermode_regs) { if (is_kernel_address(ctx->sp)) { // We are in kernel mode stack. There are several different kind kernel // stacks. See get_stack_info() in arch/x86/kernel/dumpstack_64.c // 1) Exception stack. We don't expect to see these. // 2) IRQ stack. Used for hard and softirqs. We can see them in softirq // context. Top-of-stack contains pointer to previous stack (always kernel). // Size on x86_64 is IRQ_STACK_SIZE. // 3) Entry stack. This used for kernel code when application does syscall. // Top-of-stack contains user mode pt_regs. Size is THREAD_SIZE. // // We expect to be on Entry stack, or inside one level IRQ stack which was // triggered by executing softirq work on hardirq stack. // // To optimize code, we always read full pt_regs from the top of kernel stack. // The last word of pt_regs is 'ss' which can be used to distinguish if we // are on IRQ stack (it's actually the link pointer to previous stack) or // entry stack (real SS) depending on if looks like real descriptor. struct pt_regs regs; if (bpf_probe_read(®s, sizeof(regs), get_kernel_stack_ptregs(ctx->sp))) { increment_metric(metricID_UnwindNativeErrReadKernelModeRegs); return ERR_NATIVE_READ_KERNELMODE_REGS; } #if defined(__x86_64__) if (is_kernel_address(regs.ss)) { // ss looks like kernel address. It must be the IRQ stack's link to previous // kernel stack. In our case it should be the kernel Entry stack. DEBUG_PRINT("Chasing IRQ stack link, ss=0x%lx", regs.ss); if (bpf_probe_read(®s, sizeof(regs), get_kernel_stack_ptregs(regs.ss))) { increment_metric(metricID_UnwindNativeErrChaseIrqStackLink); return ERR_NATIVE_CHASE_IRQ_STACK_LINK; } } // On x86_64 the user mode SS is practically always 0x2B. But this allows // for some flexibility. Expect RPL (requested privilege level) to be // user mode (3), and to be under max GDT value of 16. if ((regs.ss & 3) != 3 || regs.ss >= 16*8) { DEBUG_PRINT("No user mode stack, ss=0x%lx", regs.ss); *has_usermode_regs = false; return ERR_OK; } DEBUG_PRINT("Kernel mode regs (ss=0x%lx)", regs.ss); #elif defined(__aarch64__) // For backwards compatability aarch64 can run 32-bit code. Check if the process // is running in this 32-bit compat mod. if ((regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) == (PSR_MODE32_BIT | PSR_MODE_EL0t)) { return ERR_NATIVE_AARCH64_32BIT_COMPAT_MODE; } #endif copy_state_regs(state, ®s); } else { // User mode code interrupted, registers are available via the ebpf context. copy_state_regs(state, ctx); } DEBUG_PRINT("Read regs: pc: %llx sp: %llx fp: %llx", state->pc, state->sp, state->fp); *has_usermode_regs = true; return ERR_OK; } SEC("perf_event/unwind_native") int unwind_native(struct pt_regs *ctx) { PerCPURecord *record = get_per_cpu_record(); if (!record) return -1; Trace *trace = &record->trace; int unwinder; ErrorCode error; #pragma unroll for (int i = 0; i < native_frames_per_program;="" i++)="" {="" unwinder="PROG_UNWIND_STOP;" unwind="" native="" code="" u32="" frame_idx="trace-">stack_len; DEBUG_PRINT("==== unwind_native %d ====", frame_idx); increment_metric(metricID_UnwindNativeAttempts); // Push frame first. The PC is valid because a text section mapping was found. DEBUG_PRINT("Pushing %llx %llx to position %u on stack", record->state.text_section_id, record->state.text_section_offset, trace->stack_len); error = push_native(trace, record->state.text_section_id, record->state.text_section_offset); if (error) { DEBUG_PRINT("failed to push native frame"); break; } // Unwind the native frame using stack deltas. Stop if no next frame. bool stop; error = unwind_one_frame(trace->pid, frame_idx, &record->state, &stop); if (error || stop) { break; } // Continue unwinding DEBUG_PRINT(" pc: %llx sp: %llx fp: %llx", record->state.pc, record->state.sp, record->state.fp); error = get_next_unwinder_after_native_frame(record, &unwinder); if (error || unwinder != PROG_UNWIND_NATIVE) { break; } } // Tail call needed for recursion, switching to interpreter unwinder, or reporting // trace due to end-of-trace or error. The unwinder program index is set accordingly. record->state.unwind_error = error; tail_call(ctx, unwinder); DEBUG_PRINT("bpf_tail call failed for %d in unwind_native", unwinder); return -1; } static inline int collect_trace(struct pt_regs *ctx) { // Get the PID and TGID register. u64 id = bpf_get_current_pid_tgid(); u64 pid = id >> 32; if (pid == 0) { return 0; } DEBUG_PRINT("==== do_perf_event ===="); // The trace is reused on each call to this function so we have to reset the // variables used to maintain state. DEBUG_PRINT("Resetting CPU record"); PerCPURecord *record = get_pristine_per_cpu_record(); if (!record) { return -1; } Trace *trace = &record->trace; trace->pid = pid; trace->ktime = bpf_ktime_get_ns(); if (bpf_get_current_comm(&(trace->comm), sizeof(trace->comm)) < 0)="" {="" increment_metric(metricid_errbpfcurrentcomm);="" }="" get="" the="" kernel="" mode="" stack="" trace="" first="" trace-="">kernel_stack_id = bpf_get_stackid(ctx, &kernel_stackmap, BPF_F_REUSE_STACKID); DEBUG_PRINT("kernel stack id = %d", trace->kernel_stack_id); // Recursive unwind frames int unwinder = PROG_UNWIND_STOP; bool has_usermode_regs; ErrorCode error = get_usermode_regs(ctx, &record->state, &has_usermode_regs); if (error || !has_usermode_regs) { goto exit; } if (!pid_information_exists(ctx, pid)) { if (report_pid(ctx, pid, true)) { increment_metric(metricID_NumProcNew); } return 0; } error = get_next_unwinder_after_native_frame(record, &unwinder); exit: record->state.unwind_error = error; tail_call(ctx, unwinder); DEBUG_PRINT("bpf_tail call failed for %d in native_tracer_entry", unwinder); return -1; } SEC("perf_event/native_tracer_entry") int native_tracer_entry(struct bpf_perf_event_data *ctx) { return collect_trace((struct pt_regs*) &ctx->regs); }
- "漢字路" 한글한자자동변환 서비스는 교육부 고전문헌국역지원사업의 지원으로 구축되었습니다.
- "漢字路" 한글한자자동변환 서비스는 전통문화연구회 "울산대학교한국어처리연구실 옥철영(IT융합전공)교수팀"에서 개발한 한글한자자동변환기를 바탕하여 지속적으로 공동 연구 개발하고 있는 서비스입니다.
- 현재 고유명사(인명, 지명등)을 비롯한 여러 변환오류가 있으며 이를 해결하고자 많은 연구 개발을 진행하고자 하고 있습니다. 이를 인지하시고 다른 곳에서 인용시 한자 변환 결과를 한번 더 검토하시고 사용해 주시기 바랍니다.
- 변환오류 및 건의,문의사항은 juntong@juntong.or.kr로 메일로 보내주시면 감사하겠습니다. .
Copyright ⓒ 2020 By '전통문화연구회(傳統文化硏究會)' All Rights reserved.
 한국   대만   중국   일본