python开源爬虫框架scrapy源码解析(二)
程序员文章站
2024-02-18 14:54:16
...
看过上一节的就应该能够了解到scrapy的命令都是通过commands模块实现,startproject是通过scrapy/commands/startproject.py实现。
下面是对startproject.py的run方法的介绍:
def run(self, args, opts):
if len(args) != 1:
raise UsageError()
project_name = args[0] #获取项目名称
if not self._is_valid_name(project_name): #校验项目名字合法性
self.exitcode = 1
return
copytree(self.templates_dir, project_name, ignore=IGNORE) #复制工程模板到新建项目文件夹下
move(join(project_name, 'module'), join(project_name, project_name)) #修改'module'为project_name
for paths in TEMPLATES_TO_RENDER:
path = join(*paths)
tplfile = join(project_name,
string.Template(path).substitute(project_name=project_name)) #替换${project_name}为项目名
render_templatefile(tplfile, project_name=project_name,
ProjectName=string_camelcase(project_name)) #渲染模板文件
print("New Scrapy project %r, using template directory %r, created in:" % \
(project_name, self.templates_dir))
print(" %s\n" % abspath(project_name))
print("You can start your first spider with:")
print(" cd %s" % project_name)
print(" scrapy genspider example example.com")
render_templaterfile()方法的作用是将模板文件渲染后拷贝到新建项目的目录.
def render_templatefile(path, **kwargs):
with open(path, 'rb') as fp:
raw = fp.read().decode('utf8') #读取模板文件
content = string.Template(raw).substitute(**kwargs) #替换${ProjectName}变量
render_path = path[:-len('.tmpl')] if path.endswith('.tmpl') else path
with open(render_path, 'wb') as fp:
fp.write(content.encode('utf8')) #将替换后内容写入删除.impl 后缀的文件
if path.endswith('.tmpl'): #删除模板文件
os.remove(path)
转载于:https://my.oschina.net/sojie/blog/650114
下一篇: redis监控状态connect命令