Skip to content

Commit 7febe01

Browse files
author
wuerror
committed
enhance: add web.xml extrac servlet
1 parent eb427c5 commit 7febe01

4 files changed

Lines changed: 173 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## [1.5.0] - 2026-02-10
6+
7+
### Added
8+
- **Legacy Web App Support (Phase 10.1)**:
9+
- Implemented `WebXmlParser` to extract Servlet mappings from `web.xml` files in JAR/WAR archives.
10+
- Updated `RouteExtractor` to merge `web.xml` routes with Spring annotation-based routes.
11+
- Enhanced `DiscoveryEngine` to scan all available JARs (including libraries) for `web.xml` definitions, ensuring routes in dependencies (e.g., `bos-resources.jar`) are discovered.
12+
- Fixes issue where legacy Servlet routes were missed in hybrid Spring Boot + Servlet applications.
13+
514
## [1.4.0] - 2026-02-09
615

716
### Added

src/main/java/com/jbytescanner/engine/DiscoveryEngine.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@ public void run() {
4545
SootManager.initSoot(targetAppJars, combinedLibs, false);
4646

4747
// 2. Extract Routes
48-
RouteExtractor extractor = new RouteExtractor(filterAnnotations);
48+
List<String> scanJars = new ArrayList<>();
49+
if (targetAppJars != null) scanJars.addAll(targetAppJars);
50+
if (depAppJars != null) scanJars.addAll(depAppJars);
51+
if (libJars != null) scanJars.addAll(libJars);
52+
53+
RouteExtractor extractor = new RouteExtractor(filterAnnotations, scanJars);
4954
List<ApiRoute> routes = extractor.extract();
5055

5156
logger.info("Found {} API Routes.", routes.size());

src/main/java/com/jbytescanner/engine/RouteExtractor.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,19 @@ public class RouteExtractor {
3636
private static final String ANN_WEB_SERVLET = "javax.servlet.annotation.WebServlet";
3737

3838
private final List<String> filterAnnotations;
39+
private final List<String> scanJars;
3940

40-
public RouteExtractor(List<String> filterAnnotations) {
41+
public RouteExtractor(List<String> filterAnnotations, List<String> scanJars) {
4142
this.filterAnnotations = filterAnnotations;
43+
this.scanJars = scanJars;
4244
}
4345

4446
public List<ApiRoute> extract() {
4547
List<ApiRoute> routes = new ArrayList<>();
4648

49+
// 0. Extract Routes from web.xml (Legacy/Hybrid)
50+
routes.addAll(extractWebXmlRoutes());
51+
4752
for (SootClass sc : Scene.v().getApplicationClasses()) {
4853
if (sc.isPhantom()) continue;
4954

@@ -197,6 +202,39 @@ private List<ApiRoute> extractSpringRoutes(SootClass sc) {
197202
return routes;
198203
}
199204

205+
private List<ApiRoute> extractWebXmlRoutes() {
206+
List<ApiRoute> routes = new ArrayList<>();
207+
if (scanJars == null || scanJars.isEmpty()) return routes;
208+
209+
WebXmlParser parser = new WebXmlParser();
210+
for (String jarPath : scanJars) {
211+
Map<String, List<String>> webXmlRoutes = parser.parse(new java.io.File(jarPath));
212+
213+
for (Map.Entry<String, List<String>> entry : webXmlRoutes.entrySet()) {
214+
String className = entry.getKey();
215+
List<String> paths = entry.getValue();
216+
217+
// Verify if class is in Soot (Optional, but good for validation)
218+
// If the class is a library class, it might not be in "ApplicationClasses" but in "Scene.v().getClasses()"
219+
// We add it regardless, because web.xml is an explicit definition.
220+
221+
for (String path : paths) {
222+
// Create a route for ALL methods since web.xml maps the servlet generally
223+
routes.add(new ApiRoute(
224+
"ALL",
225+
path,
226+
className,
227+
"service", // Method name placeholder
228+
new ArrayList<>(), // No params known from web.xml
229+
new HashMap<>(),
230+
"application/x-www-form-urlencoded"
231+
));
232+
}
233+
}
234+
}
235+
return routes;
236+
}
237+
200238
private String combinePaths(String p1, String p2) {
201239
if (!p1.startsWith("/")) p1 = "/" + p1;
202240
if (!p2.startsWith("/") && !p2.isEmpty()) p2 = "/" + p2;
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
package com.jbytescanner.engine;
2+
3+
import org.slf4j.Logger;
4+
import org.slf4j.LoggerFactory;
5+
import org.w3c.dom.Document;
6+
import org.w3c.dom.Element;
7+
import org.w3c.dom.Node;
8+
import org.w3c.dom.NodeList;
9+
10+
import javax.xml.parsers.DocumentBuilder;
11+
import javax.xml.parsers.DocumentBuilderFactory;
12+
import java.io.File;
13+
import java.io.InputStream;
14+
import java.util.*;
15+
import java.util.jar.JarEntry;
16+
import java.util.jar.JarFile;
17+
18+
public class WebXmlParser {
19+
private static final Logger logger = LoggerFactory.getLogger(WebXmlParser.class);
20+
21+
/**
22+
* Parse web.xml inside a JAR file and return a map of Servlet Class -> List of URL Patterns
23+
*/
24+
public Map<String, List<String>> parse(File jarFile) {
25+
Map<String, List<String>> routes = new HashMap<>();
26+
27+
if (!jarFile.exists() || !jarFile.getName().endsWith(".jar")) {
28+
return routes;
29+
}
30+
31+
try (JarFile jar = new JarFile(jarFile)) {
32+
Enumeration<JarEntry> entries = jar.entries();
33+
while (entries.hasMoreElements()) {
34+
JarEntry entry = entries.nextElement();
35+
String name = entry.getName();
36+
37+
// Recursively search for web.xml (e.g., WEB-INF/web.xml, webapp/web.xml)
38+
// Using exact name match or suffix match to be safe
39+
if (name.endsWith("web.xml") && !name.contains("classes/")) { // Avoid resources inside classes if any
40+
logger.debug("Found web.xml in {}: {}", jarFile.getName(), name);
41+
try (InputStream is = jar.getInputStream(entry)) {
42+
parseWebXml(is, routes);
43+
} catch (Exception e) {
44+
logger.error("Failed to parse web.xml in {}", jarFile.getName(), e);
45+
}
46+
}
47+
}
48+
} catch (Exception e) {
49+
logger.error("Failed to process JAR for web.xml: {}", jarFile.getName(), e);
50+
}
51+
52+
return routes;
53+
}
54+
55+
private void parseWebXml(InputStream is, Map<String, List<String>> routes) throws Exception {
56+
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
57+
// Secure processing to prevent XXE
58+
dbFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
59+
dbFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
60+
dbFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
61+
dbFactory.setNamespaceAware(true); // Handle namespaces if present
62+
63+
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
64+
Document doc = dBuilder.parse(is);
65+
doc.getDocumentElement().normalize();
66+
67+
// 1. Extract Servlets: Name -> Class
68+
Map<String, String> servletNameMap = new HashMap<>();
69+
NodeList servletNodes = doc.getElementsByTagName("servlet"); // Simple tag name search ignores namespace
70+
if (servletNodes.getLength() == 0) {
71+
// Try with namespace awareness if simple search fails (though getElementsByTagName usually works)
72+
servletNodes = doc.getElementsByTagNameNS("*", "servlet");
73+
}
74+
75+
for (int i = 0; i < servletNodes.getLength(); i++) {
76+
Element element = (Element) servletNodes.item(i);
77+
String servletName = getTagValue("servlet-name", element);
78+
String servletClass = getTagValue("servlet-class", element);
79+
80+
if (servletName != null && servletClass != null) {
81+
servletNameMap.put(servletName, servletClass);
82+
}
83+
}
84+
85+
// 2. Extract Mappings: Name -> URL Pattern
86+
NodeList mappingNodes = doc.getElementsByTagName("servlet-mapping");
87+
if (mappingNodes.getLength() == 0) {
88+
mappingNodes = doc.getElementsByTagNameNS("*", "servlet-mapping");
89+
}
90+
91+
for (int i = 0; i < mappingNodes.getLength(); i++) {
92+
Element element = (Element) mappingNodes.item(i);
93+
String servletName = getTagValue("servlet-name", element);
94+
String urlPattern = getTagValue("url-pattern", element);
95+
96+
if (servletName != null && urlPattern != null) {
97+
String servletClass = servletNameMap.get(servletName);
98+
if (servletClass != null) {
99+
routes.computeIfAbsent(servletClass, k -> new ArrayList<>()).add(urlPattern);
100+
}
101+
}
102+
}
103+
}
104+
105+
private String getTagValue(String tag, Element element) {
106+
NodeList nodeList = element.getElementsByTagName(tag);
107+
if (nodeList.getLength() == 0) {
108+
nodeList = element.getElementsByTagNameNS("*", tag);
109+
}
110+
111+
if (nodeList.getLength() > 0) {
112+
Node node = nodeList.item(0).getFirstChild();
113+
if (node != null) {
114+
return node.getNodeValue().trim();
115+
}
116+
}
117+
return null;
118+
}
119+
}

0 commit comments

Comments
 (0)