Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ jobs:
- name: Create singleheader.zip
run: |
cd build/singleheader
zip singleheader.zip merve.h merve.cpp
zip singleheader.zip merve.h merve.cpp merve_c.cpp
mv singleheader.zip ../../singleheader/
cp merve.h merve.cpp ../../singleheader/
cp merve.h merve.cpp merve_c.cpp ../../singleheader/
- name: Create release
run: gh release upload "$RELEASE_TAG" singleheader/*
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ if(NOT MERVE_COVERAGE AND NOT EMSCRIPTEN)
endif()

install(
FILES include/merve.h
FILES include/merve.h include/merve_c.h
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
COMPONENT merve_development
)
Expand Down
94 changes: 92 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ A fast C++ lexer for extracting named exports from CommonJS modules. This librar
- **Source Locations**: Each export includes a 1-based line number for tooling integration
- **Unicode Support**: Properly unescapes JavaScript string literals including `\u{XXXX}` and surrogate pairs
- **Optional SIMD Acceleration**: Can use [simdutf](https://github.com/simdutf/simdutf) for faster string operations
- **C API**: Full C interface (`merve_c.h`) for use from C, FFI, or other languages
- **No Dependencies**: Single-header distribution available (simdutf is optional)
- **Cross-Platform**: Works on Linux, macOS, and Windows

Expand All @@ -31,6 +32,7 @@ target_link_libraries(your_target PRIVATE lexer::lexer)
### Single Header

Copy `singleheader/merve.h` and `singleheader/merve.cpp` to your project.
The C API header `singleheader/merve_c.h` is also included in the distribution.

## Usage

Expand Down Expand Up @@ -130,6 +132,95 @@ const std::optional<lexer_error>& get_last_error();

Returns the last parse error, if any.

## C API

merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or any language that can call C functions. The C API is compiled into the merve library alongside the C++ implementation.

### C API Usage

```c
#include "merve_c.h"
#include <stdio.h>

int main(void) {
const char* source = "exports.foo = 1;\nexports.bar = 2;\n";

merve_analysis result = merve_parse_commonjs(source, strlen(source));

if (merve_is_valid(result)) {
size_t count = merve_get_exports_count(result);
printf("Found %zu exports:\n", count);
for (size_t i = 0; i < count; i++) {
merve_string name = merve_get_export_name(result, i);
uint32_t line = merve_get_export_line(result, i);
printf(" - %.*s (line %u)\n", (int)name.length, name.data, line);
}
} else {
printf("Parse error: %d\n", merve_get_last_error());
}

merve_free(result);
return 0;
}
```
Output:
```
Found 2 exports:
- foo (line 1)
- bar (line 2)
```
### C API Reference
#### Types
| Type | Description |
|------|-------------|
| `merve_string` | Non-owning string reference (`data` + `length`). Not null-terminated. |
| `merve_analysis` | Opaque handle to a parse result. Must be freed with `merve_free()`. |
| `merve_version_components` | Struct with `major`, `minor`, `revision` fields. |
#### Functions
| Function | Description |
|----------|-------------|
| `merve_parse_commonjs(input, length)` | Parse CommonJS source. Returns a handle (NULL only on OOM). |
| `merve_is_valid(result)` | Check if parsing succeeded. NULL-safe. |
| `merve_free(result)` | Free a parse result. NULL-safe. |
| `merve_get_exports_count(result)` | Number of named exports found. |
| `merve_get_reexports_count(result)` | Number of re-export specifiers found. |
| `merve_get_export_name(result, index)` | Get export name at index. Returns `{NULL, 0}` on error. |
| `merve_get_export_line(result, index)` | Get 1-based line number of export. Returns 0 on error. |
| `merve_get_reexport_name(result, index)` | Get re-export specifier at index. Returns `{NULL, 0}` on error. |
| `merve_get_reexport_line(result, index)` | Get 1-based line number of re-export. Returns 0 on error. |
| `merve_get_last_error()` | Last error code (`MERVE_ERROR_*`), or -1 if no error. |
| `merve_get_version()` | Version string (e.g. `"1.0.1"`). |
| `merve_get_version_components()` | Version as `{major, minor, revision}`. |
#### Error Constants
| Constant | Value | Description |
|----------|-------|-------------|
| `MERVE_ERROR_UNEXPECTED_ESM_IMPORT` | 10 | Found ESM `import` declaration |
| `MERVE_ERROR_UNEXPECTED_ESM_EXPORT` | 11 | Found ESM `export` declaration |
| `MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META` | 9 | Found `import.meta` |
| `MERVE_ERROR_UNTERMINATED_STRING_LITERAL` | 6 | Unclosed string literal |
| `MERVE_ERROR_UNTERMINATED_TEMPLATE_STRING` | 5 | Unclosed template literal |
| `MERVE_ERROR_UNTERMINATED_REGEX` | 8 | Unclosed regular expression |
| `MERVE_ERROR_UNEXPECTED_PAREN` | 1 | Unexpected `)` |
| `MERVE_ERROR_UNEXPECTED_BRACE` | 2 | Unexpected `}` |
| `MERVE_ERROR_UNTERMINATED_PAREN` | 3 | Unclosed `(` |
| `MERVE_ERROR_UNTERMINATED_BRACE` | 4 | Unclosed `{` |
| `MERVE_ERROR_TEMPLATE_NEST_OVERFLOW` | 12 | Template literal nesting too deep |
#### Lifetime Rules
- The `merve_analysis` handle must be freed with `merve_free()`.
- `merve_string` values returned by accessors are valid as long as the handle has not been freed.
- For exports backed by a `string_view` (most identifiers), the original source buffer must also remain valid.
- All functions are NULL-safe: passing NULL returns safe defaults (false, 0, `{NULL, 0}`).
## Supported Patterns
### Direct Exports
Expand Down Expand Up @@ -243,8 +334,7 @@ cmake --build .
### Running Tests

```bash
cmake --build . --target real_world_tests
./tests/real_world_tests
ctest --test-dir build
```

### Build Options
Expand Down
171 changes: 171 additions & 0 deletions include/merve_c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/**
* @file merve_c.h
* @brief Includes the C definitions for merve. This is a C file, not C++.
*/
#ifndef MERVE_C_H
#define MERVE_C_H

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

/**
* @brief Non-owning string reference.
*
* The data pointer is NOT null-terminated. Always use the length field.
*
* The data is valid as long as:
* - The merve_analysis handle that produced it has not been freed.
* - For string_view-backed exports: the original source buffer is alive.
*/
typedef struct {
const char* data;
size_t length;
} merve_string;

/**
* @brief Opaque handle to a CommonJS parse result.
*
* Created by merve_parse_commonjs(). Must be freed with merve_free().
*/
typedef void* merve_analysis;

/**
* @brief Version number components.
*/
typedef struct {
int major;
int minor;
int revision;
} merve_version_components;

/* Error codes corresponding to lexer::lexer_error values. */
#define MERVE_ERROR_TODO 0
#define MERVE_ERROR_UNEXPECTED_PAREN 1
#define MERVE_ERROR_UNEXPECTED_BRACE 2
#define MERVE_ERROR_UNTERMINATED_PAREN 3
#define MERVE_ERROR_UNTERMINATED_BRACE 4
#define MERVE_ERROR_UNTERMINATED_TEMPLATE_STRING 5
#define MERVE_ERROR_UNTERMINATED_STRING_LITERAL 6
#define MERVE_ERROR_UNTERMINATED_REGEX_CHARACTER_CLASS 7
#define MERVE_ERROR_UNTERMINATED_REGEX 8
#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META 9
#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT 10
#define MERVE_ERROR_UNEXPECTED_ESM_EXPORT 11
#define MERVE_ERROR_TEMPLATE_NEST_OVERFLOW 12

#ifdef __cplusplus
extern "C" {
#endif

/**
* Parse CommonJS source code and extract export information.
*
* The source buffer must remain valid while accessing string_view-backed
* export names from the returned handle.
*
* You must call merve_free() on the returned handle when done.
*
* @param input Pointer to the JavaScript source (need not be null-terminated).
* NULL is treated as an empty string.
* @param length Length of the input in bytes.
* @return A handle to the parse result, or NULL on out-of-memory.
* Use merve_is_valid() to check if parsing succeeded.
*/
merve_analysis merve_parse_commonjs(const char* input, size_t length);

/**
* Check whether the parse result is valid (parsing succeeded).
*
* @param result Handle returned by merve_parse_commonjs(). NULL returns false.
* @return true if parsing succeeded, false otherwise.
*/
bool merve_is_valid(merve_analysis result);

/**
* Free a parse result and all associated memory.
*
* @param result Handle returned by merve_parse_commonjs(). NULL is a no-op.
*/
void merve_free(merve_analysis result);

/**
* Get the number of named exports found.
*
* @param result A parse result handle. NULL returns 0.
* @return Number of exports, or 0 if result is NULL or invalid.
*/
size_t merve_get_exports_count(merve_analysis result);

/**
* Get the number of re-export module specifiers found.
*
* @param result A parse result handle. NULL returns 0.
* @return Number of re-exports, or 0 if result is NULL or invalid.
*/
size_t merve_get_reexports_count(merve_analysis result);

/**
* Get the name of an export at the given index.
*
* @param result A valid parse result handle.
* @param index Zero-based index (must be < merve_get_exports_count()).
* @return Non-owning string reference. Returns {NULL, 0} on error.
*/
merve_string merve_get_export_name(merve_analysis result, size_t index);

/**
* Get the 1-based source line number of an export.
*
* @param result A valid parse result handle.
* @param index Zero-based index (must be < merve_get_exports_count()).
* @return 1-based line number, or 0 on error.
*/
uint32_t merve_get_export_line(merve_analysis result, size_t index);

/**
* Get the module specifier of a re-export at the given index.
*
* @param result A valid parse result handle.
* @param index Zero-based index (must be < merve_get_reexports_count()).
* @return Non-owning string reference. Returns {NULL, 0} on error.
*/
merve_string merve_get_reexport_name(merve_analysis result, size_t index);

/**
* Get the 1-based source line number of a re-export.
*
* @param result A valid parse result handle.
* @param index Zero-based index (must be < merve_get_reexports_count()).
* @return 1-based line number, or 0 on error.
*/
uint32_t merve_get_reexport_line(merve_analysis result, size_t index);

/**
* Get the error code from the last merve_parse_commonjs() call.
*
* @return One of the MERVE_ERROR_* constants, or -1 if the last parse
* succeeded.
* @note This is global state, overwritten by each merve_parse_commonjs() call.
*/
int merve_get_last_error(void);

/**
* Get the merve library version string.
*
* @return Null-terminated version string (e.g. "1.0.1"). Never NULL.
*/
const char* merve_get_version(void);

/**
* Get the merve library version as individual components.
*
* @return Struct with major, minor, and revision fields.
*/
merve_version_components merve_get_version_components(void);

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* MERVE_C_H */
1 change: 1 addition & 0 deletions singleheader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
set(SINGLEHEADER_FILES
${CMAKE_CURRENT_BINARY_DIR}/merve.cpp
${CMAKE_CURRENT_BINARY_DIR}/merve.h
${CMAKE_CURRENT_BINARY_DIR}/merve_c.h
)
set_source_files_properties(${SINGLEHEADER_FILES} PROPERTIES GENERATED TRUE)

Expand Down
11 changes: 10 additions & 1 deletion singleheader/amalgamate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
AMALGAMATE_OUTPUT_PATH = os.environ["AMALGAMATE_OUTPUT_PATH"]

# this list excludes the "src/generic headers"
ALLCFILES = ["parser.cpp"]
ALLCFILES = ["parser.cpp", "merve_c.cpp"]

# order matters
ALLCHEADERS = ["merve.h"]
Expand Down Expand Up @@ -138,11 +138,20 @@ def dofile(fid: str, prepath: str, filename: str) -> None:

amal_c.close()

# Copy merve_c.h to the output directory (it is already standalone).
MERVE_C_H_SRC = os.path.join(AMALGAMATE_INCLUDE_PATH, "merve_c.h")
MERVE_C_H_DST = os.path.join(AMALGAMATE_OUTPUT_PATH, "merve_c.h")
if os.path.exists(MERVE_C_H_SRC):
shutil.copy2(MERVE_C_H_SRC, MERVE_C_H_DST)
print(f"Copied {MERVE_C_H_SRC} to {MERVE_C_H_DST}")

zf = zipfile.ZipFile(
os.path.join(AMALGAMATE_OUTPUT_PATH, "singleheader.zip"), "w", zipfile.ZIP_DEFLATED
)
zf.write(os.path.join(AMALGAMATE_OUTPUT_PATH, OUTPUT_CPP), OUTPUT_CPP)
zf.write(os.path.join(AMALGAMATE_OUTPUT_PATH, OUTPUT_H), OUTPUT_H)
if os.path.exists(MERVE_C_H_DST):
zf.write(MERVE_C_H_DST, "merve_c.h")


print("Done with all files generation.")
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ message(STATUS "CMAKE_BUILD_TYPE : " ${CMAKE_BUILD_TYPE})
add_library(merve-include-source INTERFACE)
target_include_directories(merve-include-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
add_library(merve-source INTERFACE)
target_sources(merve-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/parser.cpp)
target_sources(merve-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/parser.cpp $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/merve_c.cpp)
target_link_libraries(merve-source INTERFACE merve-include-source)
add_library(merve parser.cpp)
add_library(merve parser.cpp merve_c.cpp)
target_include_directories(merve PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> )
target_include_directories(merve PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>")

Expand Down
2 changes: 0 additions & 2 deletions src/lexer.cpp

This file was deleted.

Loading
Loading