목적

특정 프로그램 소스를 파싱하여 소스내에서 참조하는 소스를 찾아내고

그 소스에서 다시 참조하는 소스를 찾아내서 결국 참조하는 소스를 모두 찾아내는 로직을 구현한다.

사실 웹 스크랩핑과 원리는 같다.

 

 

- 탐색해야할 소스가 정리된 엑셀파일 (module_list.xlsx)

 

- 실제소스가 있는 파일의 경로 셈플

 

- 소스탐색 후 추출한 모듈소스파일의 내역을 저장한 엑셀파일 (Program List.xlsx)

 

 

자! 만들어 보자

 

1. 가상환경 만들고, 가상환경 진입

C:\Work\Python project\소스모듈추출>python -m venv .venv

C:\Work\Python project\소스모듈추출>.venv\Scripts\activate

(.venv) C:\Work\Python project\소스모듈추출>

꼭 가상환경을 만들어 작업할 필요는 없다.

본인은 Python으로 여러 프로그램을 만들거나 테스트 하다보니 독립된 Python 환경에서 작업하는것을 선호한다. 

 

 

2. 엑셀파일을 핸들링하기 위해 pandas설치

(.venv) C:\Work\Python project\소스모듈추출>pip list
Package    Version
---------- -------
pip        23.1.2
setuptools 65.5.0

(.venv) C:\Work\Python project\소스모듈추출>pip install pandas

(.venv) C:\Work\Python project\소스모듈추출>pip install openpyxl


(.venv) C:\Work\Python project\소스모듈추출>pip list
Package         Version
--------------- -------
et-xmlfile      1.1.0
numpy           1.24.3
openpyxl        3.1.2
pandas          2.0.1
pip             23.1.2
python-dateutil 2.8.2
pytz            2023.3
setuptools      65.5.0
six             1.16.0
tzdata          2023.3

 

3. 파싱로직작성 (extract.py)

import os
import pandas as pd
import re
import copy

# Global Variable
_df = pd.read_excel("module_list.xlsx")
_root = r"C:\SMARTsuite\Project\emro\src\flex"

_moduleList = []
_id = 0

# Module Class
class Module():
    # double underscore(__) is private
    def __init__(self, id, menuCd, path, level, parentModule):
        self.__id = id
        self.__menuCd = menuCd
        self.__path = path
        self.__level = level
        self.__parentModule = parentModule
        
    @property
    def id(self):
        return self.__id
    
    @property
    def menuCd(self):
        return self.__menuCd
    
    @property
    def path(self):
        return self.__path
    
    @property
    def package(self):
        # C:\SMARTsuite\Project\emro\src\flex\module\bp\esourcing\venderMgt\MVendorMgtApp.mxml
        # => module.bp.esourcing.venderMgt
        _str = self.__path.replace(_root + "\\", "")
        _idx = _str.rfind("\\")
        _str = _str[0:_idx]
        
        return ".".join(_str.split("\\"))
           
    @property
    def name(self):
        # C:\SMARTsuite\Project\emro\src\flex\module\bp\esourcing\venderMgt\MVendorMgtApp.mxml
        # => MVendorMgtApp.mxml
        _str = self.__path.replace(_root + "\\", "")
        _idx = _str.rfind("\\")
        _str = _str[_idx+1:]
        
        return _str
        
    @property
    def level(self):
        return self.__level
        
    @property
    def parentModule(self):
        return self.__parentModule
    
    @parentModule.setter
    def parentModule(self, value):
        self.__parentModule = value
    
    def __str__(self):
        return f"\nid : {self.__id}\npath : {self.__path}\nlevel : {self.__level}\nparentModule : {self.__parentModule}"
    
# Module 중복체크
def checkDup(module):
    _isDup = False;
    _leafModulePath = module.path
    _prntModule = copy.deepcopy(module.parentModule)
    
    # 부모모듈이 있다면 Loop
    while (_prntModule):
        # 부모모듈에 자신의 경로가 있다면 중복이다.
        if (_leafModulePath == _prntModule.path):
            _isDup = True
            break
        
        _prntModule = _prntModule.parentModule
        
    return _isDup

# OS경로 만들기
def makePath(package, moduleName):
    _filePath = package.replace(".", "\\")
    _mxml = f"{moduleName}.mxml"
    _as = f"{moduleName}.as"
    
    _path = os.path.join(_root, _filePath, _mxml)
    
    if (os.path.exists(_path)):
        return _path
    
    _path = os.path.join(_root, _filePath, _as)
    
    if (os.path.exists(_path)):
        return _path
    
    return None

# Module 추출
def extractModule(module):
    # 0. Save Module
    _moduleList.append(module)
    
    with open(module.path, encoding="utf-8") as _file:
        # 1. remoce comment
        _str = _file.read()
        _str = re.sub(re.compile("<!--.*?-->", re.DOTALL), "", _str)    # <!-- -->
        _str = re.sub(re.compile("/\*.*?\*/", re.DOTALL), "", _str)     # /* */
        _str = re.sub("[^:]//.*", "", _str)                             # //
        #print(_str)
        
        # 2-1. collect namespace's prefix
        # xmlns:vendor="modules.bp.esourcing.vendorMgt.*"
        _prefix = {}
        for m in [m for m in re.findall("xmlns:(\S+)=\"(\S+)\.\*\"", _str) if m[0] not in ("mx", "vc", "cc", "ce", "uxcom", "popup", "control", "controls", "common", "amcharts")]:
            _prefix[m[0]] = m[1]
        #print(_prefix)
        
        # 2-2. collect import
        # import modules.sp.esourcing.vendorMgt.*; => "modules.sp.esourcing.vendorMgt"
        # import modules.bp.eni.setup.stat.PoAssignment; => {"PoAssignment" : "modules.bp.eni.setup.stat"}
        _importList = []
        _importMap = {}
        _importList.append(module.package)  # add current Package
        for m in [m for m in re.findall("import\s+(modules\.\S+);", _str)]:
            if (m.endswith(".*")):
                _importList.append(m[0:-2])
            else:
                _idx = m.rfind(".")
                _importMap[m[_idx+1:]] = m[0:_idx]
        #print(_importList)
        #print(_importMap)
        
        _tempModuleList = []
        # 3-1. collect module in UI
        # <vendor:VendorList ... />
        _tempList = []
        for m in [m for m in re.findall("<([^/](?:(?![<>/\{\}:])\S)+):(\S+)\s+", _str) if m[0] not in ("mx", "vc", "cc", "ce", "uxcom", "popup", "control", "controls", "common", "amcharts")]:
            _tempList.append(m)
            
        # 중복제거후 모듈 추가
        for m in set(_tempList):
            _key = m[0]
            _moduleName = m[1]
            _path = makePath(_prefix.get(_key), _moduleName)
        
            # 실제파일이 존재한다면 해당 package의 모듈이다.
            if _path is not None:
                _childModule = Module(++_id, module.menuCd, _path, module.level + 1, module)
                _tempModuleList.append(_childModule)
        
        # 3-2. collect module in logic
        # var propsReqDetail:ESPropsReqDetail = new ESPropsReqDetail();
        _tempList = []
        for m in [m for m in re.findall("new\s+(\S+)\(.*\);", _str) if m not in ("SCObject", "SCCollection", "Object", "ArrayCollection", "Boolean", "Date", "SCEvent", "CloseEvent", "RegExp", "Event", "FlexEvent", "SCExcelUploader", "URLRequest", "Array", "String", "SCDataGridColumn", "SCDataGridDumyColumn", "SCHierarchicalData", "ClassFactory", "SCTextInput", "LineLink", "SCServiceInput", "SCServiceOutput", "SCPopupSelectorEvent", "XMLListCollection")]:
            _tempList.append(m)
            
        # 중복제거후 모듈 추가
        for m in set(_tempList):
            _package = _importMap.get(m)
            if _package is not None:
                _path = makePath(_package, m)
                
                # 실제파일이 존재한다면 해당 package의 모듈이다.
                if _path is not None:
                    _childModule = Module(++_id, module.menuCd, _path, module.level + 1, module)
                    _tempModuleList.append(_childModule)
            
            else:
                # set으로 중복제거한 후 Loop
                for _package in set(_importList):
                    _path = makePath(_package, m)
                    
                    # 실제파일이 존재한다면 해당 package의 모듈이다.
                    if _path is not None:
                        _childModule = Module(++_id, module.menuCd, _path, module.level + 1, module)
                        _tempModuleList.append(_childModule)
        
        for m in _tempModuleList:
            # 중복파일 체크 (순환참조 방지용)
            if (not checkDup(m)):
                # 재귀호출
                extractModule(m)
                
                
# Entry Point
_excelRow = []

for i in _df.index:
    _menuPath   = _df["메뉴경로"][i]
    _menuCd     = _df["메뉴코드"][i]
    _menuNm     = _df["메뉴이름"][i]
    _menuUrl    = _df["메뉴URL"][i]

    _row = {}
    _row["menu_path"]   = _menuPath
    _row["menu_cd"]     = _menuCd
    _row["menu_nm"]     = _menuNm
    _row["menu_url"]    = _menuUrl
    _excelRow.append(_row)
    
    # debugging
    # if i == 10:
    #     break
    
    # debugging
    # if "PLT181200" != _menuCd:
    #     continue
    
    if (isinstance(_menuUrl, (str))):
        _file = re.findall("(\S+\.swf)", _menuUrl)
        
        # swf 확장자로된 경로라면
        if _file:
            _path = os.path.join(_root, _file[0].replace("/", "\\").replace("swf", "mxml"))
            
            if (os.path.exists(_path)):
                extractModule(Module(++_id, _menuCd, _path, 0, None))
        

# Save Excel
_sheet = {
    "메뉴경로":[],
    "메뉴코드":[],
    "메뉴이름":[],
    "File Depth":[],
    "File Path":[]
    }

for _row in _excelRow:
    _isJoin = False
    
    for _module in _moduleList:
        if (_row["menu_cd"] == _module.menuCd):
            _isJoin = True
            _sheet.get("메뉴경로").append(_row["menu_path"])
            _sheet.get("메뉴코드").append(_row["menu_cd"])
            _sheet.get("메뉴이름").append(_row["menu_nm"])
            
            # Level에 맞춰 들여쓰기
            _indent = ""
            for i in range(_module.level):
                _indent += " "
                
            if (_module.level > 0):
                _indent += "└ "
                
            _sheet.get("File Depth").append(_module.level)
            _sheet.get("File Path").append(f"{_indent}{_module.path}")
            
    # Join된 건이 없을 경우
    if not _isJoin:
        _sheet.get("메뉴경로").append(_row["menu_path"])
        _sheet.get("메뉴코드").append(_row["menu_cd"])
        _sheet.get("메뉴이름").append(_row["menu_nm"])
        _sheet.get("File Depth").append(0)
        _sheet.get("File Path").append(_row["menu_url"])
        
pd.DataFrame(_sheet).to_excel(excel_writer="Program List.xlsx", index=False)


# Console Print
for _module in _moduleList:
    if (_module.level == 0):
        print()
        
    print(_module.menuCd, end=" ")
    print(_module.level, end=" ")
    
    for i in range(_module.level):
        print(" ", end="")
        
    if (_module.level > 0):
        print("└ ", end="")
        
    print(_module.path)

 

 

4. 실행

(.venv) C:\Work\Python project\소스모듈추출>python extract.py

M00000001 0 C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EMPRReceiptList.mxml
M00000001 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRReceiptList.mxml
M00000001 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EPPRReject.mxml
M00000001 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000001 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000001 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvDetail.mxml
M00000001 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000001 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000001 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRMultiSearchPopup.mxml
M00000001 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EPPRPurcGrp.mxml

M00000002 0 C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\EMPRModReqList.mxml
M00000002 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRModReqList.mxml
M00000002 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000002 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000002 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\pr\ESPRModReqInput.mxml
M00000002 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as

M00000003 0 C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EMRfxList.mxml
M00000003 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxVdItem.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvInput.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\EPChoiceVCPopup.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaPFCompare.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaNonPFCompare.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EvalResultPopup.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaCompare.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPRfxCancel.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\EPVdSelectPopup.mxml
M00000003 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRFxSummary.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRFxCompare.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaPFCompare.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaNonPFCompare.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EvalResultPopup.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESPricingFactorView.mxml
M00000003 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxList.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPTimeInput.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvDetail.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPChangeBasePricePop.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPQtaAbdCause.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPRfxCancel.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPVdChrListPopup.mxml
M00000003 1  └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxVdDoc.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESConditionMatrix.as
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvInput.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\ESAprvLineMgt.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\approval\EPAprvInputName.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\comm\ESPConditionMatrix.as
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaPFCompare.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaNonPFCompare.mxml
M00000003 3    └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EvalResultPopup.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\ESRfxQtaCompare.mxml
M00000003 2   └ C:\SMARTsuite\Project\emro\src\flex\modules\bp\procurement\rfx\EPRfxCancel.mxml

 

위 결과처럼 콘솔로 출력도 하지만, 앞서 목표한데로 엑셀로 저장도 되었다.