Skip to content

Commit b96b683

Browse files
committed
refactor: implement dynamic package resolution and enhance dependency analysis in Python builder
1 parent 170099d commit b96b683

File tree

4 files changed

+677
-228
lines changed

4 files changed

+677
-228
lines changed

packages/snap/src/cloud/build/builders/python/add-package-to-archive.ts

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,66 @@ const addDirectoryToArchive = async (archive: Archiver, baseDir: string, dirPath
4444
)
4545
}
4646

47+
const findInSitePackages = (sitePackagesDir: string, packageName: string): string | null => {
48+
// Get potential module names
49+
const moduleNames = this.cache[packageName]
50+
51+
for (const moduleName of moduleNames) {
52+
// Check as directory
53+
const dirPath = path.join(sitePackagesDir, moduleName)
54+
if (fs.existsSync(dirPath) && fs.statSync(dirPath).isDirectory()) {
55+
return dirPath
56+
}
57+
58+
// Check as .py file
59+
const pyPath = path.join(sitePackagesDir, `${moduleName}.py`)
60+
if (fs.existsSync(pyPath)) {
61+
return pyPath
62+
}
63+
}
64+
65+
return null
66+
}
67+
4768
export const addPackageToArchive = async (
4869
archive: Archiver,
4970
sitePackagesDir: string,
5071
packageName: string,
5172
): Promise<void> => {
52-
// First try the package name as is
53-
let fullPath = path.join(sitePackagesDir, packageName)
54-
55-
// If not found, try with .py extension
56-
if (!fs.existsSync(fullPath)) {
57-
const pyPath = path.join(sitePackagesDir, `${packageName}.py`)
58-
if (fs.existsSync(pyPath)) {
59-
fullPath = pyPath
73+
// Try dynamic resolution first
74+
let fullPath = findInSitePackages(sitePackagesDir, packageName)
75+
76+
// If dynamic resolution didn't work, try heuristics
77+
if (!fullPath) {
78+
// Generate package name variations to try
79+
const namesToTry = [
80+
packageName,
81+
packageName.replace(/-/g, '_'),
82+
packageName.replace(/_/g, '-'),
83+
packageName.toLowerCase(),
84+
]
85+
86+
// Handle python- prefix
87+
if (packageName.startsWith('python-') || packageName.startsWith('python_')) {
88+
namesToTry.push(packageName.substring(7))
89+
}
90+
91+
for (const name of namesToTry) {
92+
const dirPath = path.join(sitePackagesDir, name)
93+
if (fs.existsSync(dirPath) && fs.statSync(dirPath).isDirectory()) {
94+
fullPath = dirPath
95+
break
96+
}
97+
98+
const pyPath = path.join(sitePackagesDir, `${name}.py`)
99+
if (fs.existsSync(pyPath)) {
100+
fullPath = pyPath
101+
break
102+
}
60103
}
61104
}
62105

63-
if (!fs.existsSync(fullPath)) {
106+
if (!fullPath) {
64107
console.log(colors.yellow(`Warning: Package not found in site-packages: ${packageName}`))
65108
return
66109
}

packages/snap/src/cloud/build/builders/python/index.ts

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ export class PythonBuilder implements StepBuilder {
9090
.replace(/^[0-9]+/g, '') // Remove numeric prefixes
9191
.replace(/[^a-zA-Z0-9._]/g, '_') // Replace any non-alphanumeric characters (except dots) with underscores
9292
.replace(/^_/, ''),
93-
) // Remove leading underscore
93+
)
9494

9595
normalizedStepPath = normalizePythonModulePath
9696
? pathParts.join('.') // Convert path delimiter to dot (python module separator)
@@ -146,31 +146,55 @@ export class PythonBuilder implements StepBuilder {
146146

147147
private async getPythonBuilderData(step: Step): Promise<{ packages: string[]; files: string[] }> {
148148
return new Promise((resolve, reject) => {
149-
const child = spawn('python', [path.join(__dirname, 'python-builder.py'), step.filePath], {
149+
// Pass project directory and entry file explicitly
150+
const child = spawn('python', [
151+
path.join(__dirname, 'python-builder.py'),
152+
this.builder.projectDir,
153+
step.filePath
154+
], {
150155
cwd: this.builder.projectDir,
151156
stdio: [undefined, undefined, 'pipe', 'ipc'],
152157
env: {
153158
...process.env,
154-
PROJECT_ROOT: this.builder.projectDir,
159+
NODE_CHANNEL_FD: '3', // Explicitly set IPC channel
155160
},
156161
})
162+
157163
const err: string[] = []
158164

159165
child.stderr?.on('data', (data) => err.push(data.toString()))
166+
160167
child.on('message', (data: any) => {
161-
// Ensure we have the expected format
162-
if (!data || typeof data !== 'object' || !Array.isArray(data.packages) || !Array.isArray(data.files)) {
168+
// Handle both old format (string array) and new format (with versions)
169+
if (!data || typeof data !== 'object') {
163170
reject(new Error('Invalid response from Python builder'))
164-
} else {
165-
resolve({
166-
packages: data.packages as string[],
167-
files: data.files as string[],
168-
})
171+
return
172+
}
173+
174+
// Extract packages (handle both formats)
175+
let packages: string[] = []
176+
if (Array.isArray(data.packages)) {
177+
packages = data.packages.map((pkg: any) => {
178+
if (typeof pkg === 'string') return pkg
179+
if (pkg && typeof pkg === 'object' && pkg.name) return pkg.name
180+
return null
181+
}).filter(Boolean)
169182
}
183+
184+
// Extract files
185+
const files = Array.isArray(data.files) ? data.files : []
186+
187+
resolve({ packages, files })
188+
})
189+
190+
child.on('error', (error) => {
191+
reject(new Error(`Failed to spawn Python builder: ${error.message}`))
170192
})
193+
171194
child.on('close', (code) => {
172195
if (code !== 0) {
173-
reject(new Error(err.join('')))
196+
const errorMsg = err.join('').trim() || `Python builder exited with code ${code}`
197+
reject(new Error(errorMsg))
174198
}
175199
})
176200
})
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Dynamic Package-to-Module Resolver
4+
Uses importlib.metadata to dynamically resolve package names to their importable modules.
5+
"""
6+
7+
import os
8+
import sys
9+
import json
10+
import importlib.metadata
11+
from typing import Dict, List, Optional, Set
12+
from pathlib import Path
13+
from functools import lru_cache
14+
15+
16+
class PackageModuleResolver:
17+
"""Resolves package distribution names to their importable module names"""
18+
19+
def __init__(self):
20+
self._cache: Dict[str, List[str]] = {}
21+
self._reverse_cache: Dict[str, str] = {} # module -> package mapping
22+
self._build_reverse_cache()
23+
24+
def _build_reverse_cache(self):
25+
"""Build a reverse mapping of module names to package names"""
26+
try:
27+
for dist in importlib.metadata.distributions():
28+
try:
29+
# Get top-level modules for this distribution
30+
if dist.files:
31+
top_level = self._get_top_level_modules(dist.name)
32+
for module in top_level:
33+
self._reverse_cache[module] = dist.name
34+
except Exception:
35+
continue
36+
except Exception:
37+
pass
38+
39+
@lru_cache(maxsize=256)
40+
def _get_top_level_modules(self, package_name: str) -> List[str]:
41+
"""Get top-level modules provided by a package"""
42+
try:
43+
dist = importlib.metadata.distribution(package_name)
44+
45+
# Method 1: Check for top_level.txt in metadata
46+
if dist.files:
47+
for file in dist.files:
48+
if file.name == 'top_level.txt':
49+
content = file.read_text()
50+
if content:
51+
return [m.strip() for m in content.strip().split('\n') if m.strip()]
52+
53+
# Method 2: Check the 'top_level' metadata (newer packages)
54+
if hasattr(dist, 'read_text'):
55+
try:
56+
top_level_txt = dist.read_text('top_level.txt')
57+
if top_level_txt:
58+
return [m.strip() for m in top_level_txt.strip().split('\n') if m.strip()]
59+
except Exception:
60+
pass
61+
62+
# Method 3: Infer from files
63+
modules = set()
64+
if dist.files:
65+
for file in dist.files:
66+
# Look for .py files or package directories
67+
path = Path(str(file))
68+
if path.suffix == '.py':
69+
# Get the module name from the file
70+
if path.stem != '__init__':
71+
modules.add(path.stem)
72+
elif '/' in str(file):
73+
# Get the top-level package name
74+
parts = str(file).split('/')
75+
if parts[0] and not parts[0].endswith('.dist-info') and not parts[0].endswith('.egg-info'):
76+
modules.add(parts[0])
77+
78+
if modules:
79+
return sorted(list(modules))
80+
81+
except importlib.metadata.PackageNotFoundError:
82+
pass
83+
except Exception:
84+
pass
85+
86+
# Fallback: Return empty list
87+
return []
88+
89+
def get_module_names(self, package_name: str) -> List[str]:
90+
"""Get the importable module names for a package"""
91+
if package_name in self._cache:
92+
return self._cache[package_name]
93+
94+
# Try exact name first
95+
modules = self._get_top_level_modules(package_name)
96+
97+
# Try variations if exact name didn't work
98+
if not modules:
99+
variations = [
100+
package_name.replace('-', '_'),
101+
package_name.replace('_', '-'),
102+
package_name.lower(),
103+
package_name.upper(),
104+
]
105+
106+
for variant in variations:
107+
modules = self._get_top_level_modules(variant)
108+
if modules:
109+
break
110+
111+
# If still no modules found, use heuristics
112+
if not modules:
113+
modules = self._heuristic_module_names(package_name)
114+
115+
self._cache[package_name] = modules
116+
return modules
117+
118+
def _heuristic_module_names(self, package_name: str) -> List[str]:
119+
"""Use heuristics to guess module names"""
120+
guesses = []
121+
122+
# Common transformations
123+
base_name = package_name.lower()
124+
125+
# Remove common prefixes
126+
if base_name.startswith('python-'):
127+
base_name = base_name[7:]
128+
elif base_name.startswith('py'):
129+
# Special case: PyJWT -> jwt
130+
possible = base_name[2:].lower()
131+
if possible:
132+
guesses.append(possible)
133+
134+
# Standard transformations
135+
guesses.extend([
136+
base_name,
137+
base_name.replace('-', '_'),
138+
base_name.replace('_', '-'),
139+
package_name.replace('-', '_'),
140+
])
141+
142+
# Remove duplicates while preserving order
143+
seen = set()
144+
result = []
145+
for guess in guesses:
146+
if guess and guess not in seen:
147+
seen.add(guess)
148+
result.append(guess)
149+
150+
return result
151+
152+
def find_package_for_module(self, module_name: str) -> Optional[str]:
153+
"""Find the package that provides a given module"""
154+
# Check cache first
155+
if module_name in self._reverse_cache:
156+
return self._reverse_cache[module_name]
157+
158+
# Check top-level module name
159+
base_module = module_name.split('.')[0]
160+
if base_module in self._reverse_cache:
161+
return self._reverse_cache[base_module]
162+
163+
# Try to find by checking all distributions
164+
for dist in importlib.metadata.distributions():
165+
try:
166+
modules = self._get_top_level_modules(dist.name)
167+
if base_module in modules:
168+
self._reverse_cache[base_module] = dist.name
169+
return dist.name
170+
except Exception:
171+
continue
172+
173+
return None
174+
175+
def generate_package_map(self, packages: List[str]) -> Dict[str, List[str]]:
176+
"""Generate a complete package-to-modules mapping for given packages"""
177+
mapping = {}
178+
for package in packages:
179+
modules = self.get_module_names(package)
180+
if modules:
181+
mapping[package] = modules
182+
return mapping
183+
184+
185+
def main():
186+
"""Command-line interface for testing"""
187+
if len(sys.argv) < 2:
188+
print("Usage: python package-resolver.py <package_name> [<package_name> ...]")
189+
print(" or: python package-resolver.py --find-module <module_name>")
190+
print(" or: python package-resolver.py --generate-map <package1> <package2> ...")
191+
sys.exit(1)
192+
193+
resolver = PackageModuleResolver()
194+
195+
if sys.argv[1] == '--find-module' and len(sys.argv) > 2:
196+
# Find package for a module
197+
module_name = sys.argv[2]
198+
package = resolver.find_package_for_module(module_name)
199+
result = {
200+
'module': module_name,
201+
'package': package
202+
}
203+
print(json.dumps(result, indent=2))
204+
205+
elif sys.argv[1] == '--generate-map':
206+
# Generate complete mapping
207+
packages = sys.argv[2:] if len(sys.argv) > 2 else []
208+
mapping = resolver.generate_package_map(packages)
209+
print(json.dumps(mapping, indent=2))
210+
211+
else:
212+
# Get modules for packages
213+
results = {}
214+
for package_name in sys.argv[1:]:
215+
modules = resolver.get_module_names(package_name)
216+
results[package_name] = modules
217+
218+
print(json.dumps(results, indent=2))
219+
220+
221+
if __name__ == "__main__":
222+
main()

0 commit comments

Comments
 (0)