Write a Zotero Translator for kanyanbao.com
最近一般用 Mendeley 做文献管理, 对于主要的学术期刊网站支持比较好,而且多终端云同步, 缺点是不支持自定义扩展,所以没办法用来记录整理卖方的研究报告。
相比之下zotero是一个更具扩展性的平台, 可以通过自定义插件来支持特定网站格式。 2016年的时候,为 看研报写过一个扩展,但当时是基于 framework 实现的, 在2017年底,zotero开始逐渐放弃对framework的支持。 因为内容比较简单,最近移除了framework的依赖,重新实现了一下。 安装好zotero的浏览器插件后,在看研报的网站上浏览研报时, 无论是摘要页面还是pdf浏览页面,zotero都可以识别出 机构,作者,报告标题,时间等,并将pdf下载下来。
然而在浏览pdf的页面使用插件,会造成摘要无法正常获取。
How to Use
到 gist
下载最新的版本,存储到 zotero
数据文件夹
下的 translators
文件夹即可。 如果无法访问gist,
可以复制文末的代码,另存为 kanyanbao.js
即可。
关于写translator有几个比较重要的参考资料:
- https://www.zotero.org/support/dev/translators
- https://www.zotero.org/support/dev/translators/coding
- https://aurimasv.github.io/z2csl/typeMap.xml
Code
You should prefer code on gist if available.
{
"translatorID": "56ebac6c-56f7-4564-ac07-58d6a1b2948a",
"label": "kanyanbao",
"creator": "Shel Kong",
"target": "^https?://(www.)?kanyanbao.com/",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": false,
"translatorType": 6,
"browserSupport": "gcs",
"lastUpdated": "2018-12-13 12:54:50"
}
// Zotero translator for kanyanbao.com
// Supports detail and pdf page for single report
function detectWeb(doc, url) {
if (url.indexOf("report/detail.htm") != -1) {
return "detail";
} else if (url.indexOf("report/frame") != -1) {
return "pdf";
}
}
function doWeb(doc, url) {
var detected = detectWeb(doc, url);
if (detected == "detail") {
scrape_detail(doc, url);
} else if (detected == "pdf") {
scrape_pdf(doc, url);
}
}
function scrape(doc) {
item.title = ZU.xpathText(doc, '//span[@class="maintitle"]');
// item.reportNumber = url.match(/docid=(\d+)/)[1];
// creators
// item.institution = ZU.xpathText(doc, '//span[@class="brokername *"]');
var metarow = ZU.xpath(doc, '//div[@class="brokeranalyst"]')[0];
meta = ZU.xpathText(doc, '//div[@class="brokeranalyst"]');
item.institution = ZU.xpathText(metarow, '//span[contains(@class, "brokername")]');
item.date = meta.match(/[\d\/]+/)[0].replace(/\//g, '-').replace(/^/, '20');
abnode = ZU.xpath(doc, '//div[contains(@class,"fullsummary")]')[0]
summary = ZU.xpathText(abnode, '.').trim();
if (summary !== "get_summary('')") {
item.abstractNote = summary;
}
item.reportNumber = abnode.getAttribute('data-docid');
tags = meta.match(/([^|]+)\s*\|\s*\d/)[1].trim().split(/\s/);
for (var i in tags) {
tag = tags[i];
if (tag !== "") {
item.tags.push(tag)
}
}
authors = ZU.xpathText(metarow, '//span[contains(@class, "analyst")]');
authors = ZU.trimInternal(authors).split(',');
for (var j in authors) {
author = authors[j];
author = ZU.cleanAuthor(author, 'author', false);
item.creators.push(author);
}
attachs = ZU.xpath(doc, '//div[@class="attachmentrow"]');
for (var i in attachs) {
attnode = attachs[i];
fname = attnode.textContent.trim().split(/\s/)[0];
dlnode = ZU.xpath(attnode, '//a[text() = "下载"]')[0];
dlink = 'https://kanyanbao.com' + dlnode.getAttribute('href');
item.attachments.push({
title: fname.split(".")[0],
url: dlink,
mimeType: 'application/pdf',
snapshot: true,
})
}
return item;
}
// detail
function scrape_detail(doc, url) {
// title
item = new Zotero.Item("report");
scrape(doc, item);
item.url = url;
item.complete();
}
function get_summary(doc) {
item.abstractNote = doc.body.textContent;
}
// pdf
function scrape_pdf(doc, url) {
item = new Zotero.Item("report");
var docid = url.match(/docid=(\d+)/)[1];
var detailink = 'https://kanyanbao.com/report/detail.htm?docid=' + docid;
item.url = detailink;
ZU.processDocuments(detailink, scrape, function() {
var summarylink = 'https://www.kanyanbao.com/report/get_summary.htm?id=' + docid;
ZU.processDocuments(summarylink, get_summary, function() {
item.complete();
});
});
}
function doExport() {
while (item = Zotero.nextItem()) {
var url = 'https://kanyanbao.com/report/detail.htm?docid=' + item.reportNumber;
var short = '【'
short += item.date + " " + item.institution + "】";
anchor = "[^" + item.title + "_" + item.reportNumber + "]: "
var str = "[" + short + "](" + url + ") " + item.title + "\n";
Zotero.write(anchor + str);
}
}