목적
특정 프로그램 소스를 파싱하여 소스내에서 참조하는 소스를 찾아내고
그 소스에서 다시 참조하는 소스를 찾아내서 결국 참조하는 소스를 모두 찾아내는 로직을 구현한다.
사실 웹 스크랩핑과 원리는 같다.
- 탐색해야할 소스가 정리된 엑셀파일 (module_list.xlsx)
- 실제소스가 있는 파일의 경로 셈플
- 소스탐색 후 추출한 모듈소스파일의 내역을 저장한 엑셀파일 (Program List.xlsx)
자! 만들어 보자
1. 가상환경 만들고, 가상환경 진입
C:\Work\Python project\소스모듈추출>python -m venv .venv
C:\Work\Python project\소스모듈추출>.venv\Scripts\activate
(.venv) C:\Work\Python project\소스모듈추출>
꼭 가상환경을 만들어 작업할 필요는 없다.
본인은 Python으로 여러 프로그램을 만들거나 테스트 하다보니 독립된 Python 환경에서 작업하는것을 선호한다.
2. 엑셀파일을 핸들링하기 위해 pandas설치
(.venv) C:\Work\Python project\소스모듈추출>pip list
Package Version
---------- -------
pip 23.1.2
setuptools 65.5.0
(.venv) C:\Work\Python project\소스모듈추출>pip install pandas
(.venv) C:\Work\Python project\소스모듈추출>pip install openpyxl
(.venv) C:\Work\Python project\소스모듈추출>pip list
Package Version
--------------- -------
et-xmlfile 1.1.0
numpy 1.24.3
openpyxl 3.1.2
pandas 2.0.1
pip 23.1.2
python-dateutil 2.8.2
pytz 2023.3
setuptools 65.5.0
six 1.16.0
tzdata 2023.3
3. 파싱로직작성 (extract.py)
import os
import pandas as pd
import re
import copy
# Global Variable
_df = pd.read_excel("module_list.xlsx")
_root = r"C:\SMARTsuite\Project\emro\src\flex"
_moduleList = []
_id = 0
# Module Class
class Module():
# double underscore(__) is private
def __init__(self, id, menuCd, path, level, parentModule):
self.__id = id
self.__menuCd = menuCd
self.__path = path
self.__level = level
self.__parentModule = parentModule
@property
def id(self):
return self.__id
@property
def menuCd(self):
return self.__menuCd
@property
def path(self):
return self.__path
@property
def package(self):
# C:\SMARTsuite\Project\emro\src\flex\module\bp\esourcing\venderMgt\MVendorMgtApp.mxml
# => module.bp.esourcing.venderMgt
_str = self.__path.replace(_root + "\\", "")
_idx = _str.rfind("\\")
_str = _str[0:_idx]
return ".".join(_str.split("\\"))
@property
def name(self):
# C:\SMARTsuite\Project\emro\src\flex\module\bp\esourcing\venderMgt\MVendorMgtApp.mxml
# => MVendorMgtApp.mxml
_str = self.__path.replace(_root + "\\", "")
_idx = _str.rfind("\\")
_str = _str[_idx+1:]
return _str
@property
def level(self):
return self.__level
@property
def parentModule(self):
return self.__parentModule
@parentModule.setter
def parentModule(self, value):
self.__parentModule = value
def __str__(self):
return f"\nid : {self.__id}\npath : {self.__path}\nlevel : {self.__level}\nparentModule : {self.__parentModule}"
# Module 중복체크
def checkDup(module):
_isDup = False;
_leafModulePath = module.path
_prntModule = copy.deepcopy(module.parentModule)
# 부모모듈이 있다면 Loop
while (_prntModule):
# 부모모듈에 자신의 경로가 있다면 중복이다.
if (_leafModulePath == _prntModule.path):
_isDup = True
break
_prntModule = _prntModule.parentModule
return _isDup
# OS경로 만들기
def makePath(package, moduleName):
_filePath = package.replace(".", "\\")
_mxml = f"{moduleName}.mxml"
_as = f"{moduleName}.as"
_path = os.path.join(_root, _filePath, _mxml)
if (os.path.exists(_path)):
return _path
_path = os.path.join(_root, _filePath, _as)
if (os.path.exists(_path)):
return _path
return None
# Module 추출
def extractModule(module):
# 0. Save Module
_moduleList.append(module)
with open(module.path, encoding="utf-8") as _file:
# 1. remoce comment
_str = _file.read()
_str = re.sub(re.compile("<!--.*?-->", re.DOTALL), "", _str) # <!-- -->
_str = re.sub(re.compile("/\*.*?\*/", re.DOTALL), "", _str) # /* */
_str = re.sub("[^:]//.*", "", _str) # //
#print(_str)
# 2-1. collect namespace's prefix
# xmlns:vendor="modules.bp.esourcing.vendorMgt.*"
_prefix = {}
for m in [m for m in re.findall("xmlns:(\S+)=\"(\S+)\.\*\"", _str) if m[0] not in ("mx", "vc", "cc", "ce", "uxcom", "popup", "control", "controls", "common", "amcharts")]:
_prefix[m[0]] = m[1]
#print(_prefix)
# 2-2. collect import
# import modules.sp.esourcing.vendorMgt.*; => "modules.sp.esourcing.vendorMgt"
# import modules.bp.eni.setup.stat.PoAssignment; => {"PoAssignment" : "modules.bp.eni.setup.stat"}
_importList = []
_importMap = {}
_importList.append(module.package) # add current Package
for m in [m for m in re.findall("import\s+(modules\.\S+);", _str)]:
if (m.endswith(".*")):
_importList.append(m[0:-2])
else:
_idx = m.rfind(".")
_importMap[m[_idx+1:]] = m[0:_idx]
#print(_importList)
#print(_importMap)
_tempModuleList = []
# 3-1. collect module in UI
# <vendor:VendorList ... />
_tempList = []
for m in [m for m in re.findall("<([^/](?:(?![<>/\{\}:])\S)+):(\S+)\s+", _str) if m[0] not in ("mx", "vc", "cc", "ce", "uxcom", "popup", "control", "controls", "common", "amcharts")]:
_tempList.append(m)
# 중복제거후 모듈 추가
for m in set(_tempList):
_key = m[0]
_moduleName = m[1]
_path = makePath(_prefix.get(_key), _moduleName)
# 실제파일이 존재한다면 해당 package의 모듈이다.
if _path is not None:
_childModule = Module(++_id, module.menuCd, _path, module.level + 1, module)
_tempModuleList.append(_childModule)
# 3-2. collect module in logic
# var propsReqDetail:ESPropsReqDetail = new ESPropsReqDetail();
_tempList = []
for m in [m for m in re.findall("new\s+(\S+)\(.*\);", _str) if m not in ("SCObject", "SCCollection", "Object", "ArrayCollection", "Boolean", "Date", "SCEvent", "CloseEvent", "RegExp", "Event", "FlexEvent", "SCExcelUploader", "URLRequest", "Array", "String", "SCDataGridColumn", "SCDataGridDumyColumn", "SCHierarchicalData", "ClassFactory", "SCTextInput", "LineLink", "SCServiceInput", "SCServiceOutput", "SCPopupSelectorEvent", "XMLListCollection")]:
_tempList.append(m)
# 중복제거후 모듈 추가
for m in set(_tempList):
_package = _importMap.get(m)
if _package is not None:
_path = makePath(_package, m)
# 실제파일이 존재한다면 해당 package의 모듈이다.
if _path is not None:
_childModule = Module(++_id, module.menuCd, _path, module.level + 1, module)
_tempModuleList.append(_childModule)
else:
# set으로 중복제거한 후 Loop
for _package in set(_importList):
_path = makePath(_package, m)
# 실제파일이 존재한다면 해당 package의 모듈이다.
if _path is not None:
_childModule = Module(++_id, module.menuCd, _path, module.level + 1, module)
_tempModuleList.append(_childModule)
for m in _tempModuleList:
# 중복파일 체크 (순환참조 방지용)
if (not checkDup(m)):
# 재귀호출
extractModule(m)
# Entry Point
_excelRow = []
for i in _df.index:
_menuPath = _df["메뉴경로"][i]
_menuCd = _df["메뉴코드"][i]
_menuNm = _df["메뉴이름"][i]
_menuUrl = _df["메뉴URL"][i]
_row = {}
_row["menu_path"] = _menuPath
_row["menu_cd"] = _menuCd
_row["menu_nm"] = _menuNm
_row["menu_url"] = _menuUrl
_excelRow.append(_row)
# debugging
# if i == 10:
# break
# debugging
# if "PLT181200" != _menuCd:
# continue
if (isinstance(_menuUrl, (str))):
_file = re.findall("(\S+\.swf)", _menuUrl)
# swf 확장자로된 경로라면
if _file:
_path = os.path.join(_root, _file[0].replace("/", "\\").replace("swf", "mxml"))
if (os.path.exists(_path)):
extractModule(Module(++_id, _menuCd, _path, 0, None))
# Save Excel
_sheet = {
"메뉴경로":[],
"메뉴코드":[],
"메뉴이름":[],
"File Depth":[],
"File Path":[]
}
for _row in _excelRow:
_isJoin = False
for _module in _moduleList:
if (_row["menu_cd"] == _module.menuCd):
_isJoin = True
_sheet.get("메뉴경로").append(_row["menu_path"])
_sheet.get("메뉴코드").append(_row["menu_cd"])
_sheet.get("메뉴이름").append(_row["menu_nm"])
# Level에 맞춰 들여쓰기
_indent = ""
for i in range(_module.level):
_indent += " "
if (_module.level > 0):
_indent += "└ "
_sheet.get("File Depth").append(_module.level)
_sheet.get("File Path").append(f"{_indent}{_module.path}")
# Join된 건이 없을 경우
if not _isJoin:
_sheet.get("메뉴경로").append(_row["menu_path"])
_sheet.get("메뉴코드").append(_row["menu_cd"])
_sheet.get("메뉴이름").append(_row["menu_nm"])
_sheet.get("File Depth").append(0)
_sheet.get("File Path").append(_row["menu_url"])
pd.DataFrame(_sheet).to_excel(excel_writer="Program List.xlsx", index=False)
# Console Print
for _module in _moduleList:
if (_module.level == 0):
print()
print(_module.menuCd, end=" ")
print(_module.level, end=" ")
for i in range(_module.level):
print(" ", end="")
if (_module.level > 0):
print("└ ", end="")
print(_module.path)
4. 실행
(.venv) C:\Work\Python project\소스모듈추출>python extract.py
M00000001 0 C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EMPRReceiptList.mxml
M00000001 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRReceiptList.mxml
M00000001 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EPPRReject.mxml
M00000001 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000001 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000001 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvDetail.mxml
M00000001 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000001 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000001 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRMultiSearchPopup.mxml
M00000001 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EPPRPurcGrp.mxml
M00000002 0 C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EMPRModReqList.mxml
M00000002 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRModReqList.mxml
M00000002 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000002 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000002 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRModReqInput.mxml
M00000002 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 0 C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EMRfxList.mxml
M00000003 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxVdItem.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvInput.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\EPChoiceVCPopup.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaPFCompare.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaNonPFCompare.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EvalResultPopup.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaCompare.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPRfxCancel.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\EPVdSelectPopup.mxml
M00000003 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRFxSummary.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRFxCompare.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaPFCompare.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaNonPFCompare.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EvalResultPopup.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESPricingFactorView.mxml
M00000003 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxList.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPTimeInput.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvDetail.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPChangeBasePricePop.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPQtaAbdCause.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPRfxCancel.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPVdChrListPopup.mxml
M00000003 1 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxVdDoc.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvInput.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaPFCompare.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaNonPFCompare.mxml
M00000003 3 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EvalResultPopup.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaCompare.mxml
M00000003 2 └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPRfxCancel.mxml
위 결과처럼 콘솔로 출력도 하지만, 앞서 목표한데로 엑셀로 저장도 되었다.