Java - Make a downloader by Httpget in Java
What tool can you download website resource by HTTP ?
今天要來講如何用Java中的httpcore.jar(提供Http協議服務的一些class、ex:http Request . http Response)、httpcomponents-client-4.52.jar(模擬瀏覽器向伺服器端(server)發出request(請求)的行為)等等的library來擷取網站的資源,首先,既然是一個下載器器當然就要有Graphical User Interface GUI-輔助使用者操作的一種介面 ,GUI可以讓使用者不用輸入指令,即可對電腦下達執行某些功能或做某些事的介面,通常藉由點擊滑鼠,或鍵盤的一些簡單的輸入,所以我們要先來做這個下載器的介面部分,圖形介面部分我們會使用到awt 這個library裡面的一些東西,例如:Panel、Label、Textfield、Button、TextArea、FlowLayout、GridLayout、BorderLayout等等的元件(Components),如下圖所示而接下來要做的使用者介面者如下:
如圖所示,會有一個欄位可以輸入網址,然後Check the link 可以測試連結的狀態,然後可以選擇要把url取得的資源下載到哪個資料夾底下、Open link in Browser可以用你預設的瀏覽器開啟你剛剛貼的那個資源、Copy all info of Headers能將途中的Header info複製並貼到你想要的編輯器上,以及可以下載你輸入的url的資源,並選擇檔案的格式(也會自動偵測)。
Grapic User Interface的程式碼如下:
public class GUIDownloader extends Frame {
Panel panel;
TextField url;
Button checkLink;
Panel statusInfo;
TextField status;
TextField contentType;
TextField cdn;
Panel headerInfo;
TextArea headers;
Panel pathPanel;
TextField path;
Button pathSelect;
Panel feather;
Button openLink;
Button copyHeadersInfo;
Button download;
WiseDownloader downloader;
Panel downloadSetting;
TextField fileName;
TextField fileType;
Choice fileTypeChoice;
String[] fileTypeCollection = { "", "html", "png", "jpg", "png", "tif", "gif", "bmp", "flv", "mp3", "mp4" };
FileDialog fileDialog;
TextField downloadInfo;
public static void main(String[] args) {
new GUIDownloader("Fast Downloader");
}
public GUIDownloader(String title) {
// 設定程式標題
setTitle(title);
// 設定視窗大小
setSize(900, 700);
// 設定Layout
setLayout(new FlowLayout());
panel = new Panel();
add(panel);
panel.add(new Label("Enter url of the web you want: "));
url = new TextField(70);
panel.add(url);
checkLink = new Button("Check the link");
panel.add(checkLink);
add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
statusInfo = new Panel(new GridLayout(3, 2));
statusInfo.add(new Label("Http Status: "));
status = new TextField(15);
status.setEditable(false);
//設定字形 與 大小
Font font = new Font(Font.DIALOG, Font.TYPE1_FONT, 20);
Font font2 = new Font(Font.DIALOG, Font.TYPE1_FONT, 18);
status.setFont(font);
statusInfo.add(status);
add(statusInfo);
statusInfo.add(new Label("Content-Type: "));
contentType = new TextField(20);
contentType.setFont(font);
contentType.setEditable(false);
statusInfo.add(contentType);
statusInfo.add(new Label("CDN Service: "));
cdn = new TextField(15);
cdn.setEditable(false);
cdn.setFont(font);
statusInfo.add(cdn);
add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
headerInfo = new Panel(new BorderLayout());
headerInfo.add(new Label("Header Info: "), BorderLayout.NORTH);
headers = new TextArea(10, 60);
headers.setEditable(false);
headerInfo.add(headers);
headers.setFont(font2);
add(headerInfo);
add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
pathPanel = new Panel();
pathPanel.add(new Label("Path: "));
path = new TextField(40);
path.setEditable(false);
pathPanel.add(path);
pathSelect = new Button("...");
pathPanel.add(pathSelect);
add(pathPanel);
add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
downloadSetting = new Panel(new GridLayout(4, 2));
downloadSetting.add(new Label("Download Info: "), BorderLayout.NORTH);
downloadSetting.add(new Label(""));
downloadSetting.add(new Label("name of file: "), BorderLayout.CENTER);
fileName = new TextField(30);
downloadSetting.add(fileName, BorderLayout.CENTER);
downloadSetting.add(new Label("type of file: "), BorderLayout.CENTER);
fileType = new TextField(10);
downloadSetting.add(fileType);
downloadSetting.add(new Label("choose the type of file(priority): "));
fileTypeChoice = new Choice();
for (String emt : fileTypeCollection) {
fileTypeChoice.add(emt);
}
downloadSetting.add(fileTypeChoice);
add(downloadSetting);
add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
feather = new Panel(new FlowLayout());
openLink = new Button("Open link in Browser");
feather.add(openLink);
copyHeadersInfo = new Button("Copy all info of Headers");
feather.add(copyHeadersInfo);
download = new Button("Download !");
feather.add(download);
add(feather);
add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
Panel downloadStatus = new Panel();
downloadInfo = new TextField(40);
downloadInfo.setEditable(false);
downloadStatus.add(downloadInfo);
add(downloadStatus);
downloadInfo.setFont(font);
downloadInfo.setForeground(Color.green);
checkLink.addMouseListener(new MouseAdapter() {
public void mouseClicked(MouseEvent e) {
if (!url.getText().equals("") && e.getButton() == MouseEvent.BUTTON1) {
downloader = new WiseDownloader();
downloader.ConnectToTarget(url.getText());
if (downloader.responseStatusInfo.getHttpStatus().indexOf("200") != -1) {
status.setText(downloader.responseStatusInfo.getHttpStatus());
contentType.setText(downloader.responseStatusInfo.getContentType());
cdn.setText(downloader.responseStatusInfo.getCdnService());
status.setForeground(Color.green);
fileName.setText(downloader.responseStatusInfo.getFileName());
fileType.setText(downloader.responseStatusInfo.getFileType());
String headerInfo = "";
for (Header header : downloader.headers) {
headerInfo += header.getName();
if (header.getName().length() < 30) {
for (int i = 0; i < 30 - header.getName().length(); i++) {
headerInfo += " ";
}
}
headerInfo += header.getValue() + "\n";
}
headers.setForeground(Color.BLUE);
headers.setText(headerInfo);
contentType.setForeground(Color.BLACK);
cdn.setForeground(Color.BLACK);
fileName.setForeground(Color.BLACK);
fileType.setForeground(Color.BLACK);
}
}
}
});
pathSelect.addMouseListener(new MouseAdapter() {
public void mouseClicked(MouseEvent event) {
if (event.getButton() == MouseEvent.BUTTON1) {
fileDialog = new
FileDialog(GUIDownloader.this, "Select Folder",
FileDialog.LOAD);
fileDialog.setFilenameFilter(new FolderFilter());
fileDialog.show();
fileDialog.setVisible(true);
String folderSelected = fileDialog.getDirectory();
File folder = new File(folderSelected);
if(folder.exists() && folder.isDirectory())
{
path.setText(folder.getAbsolutePath());
path.setForeground(Color.BLACK);
}
}
}
});
//在Browser開啟連結
openLink.addMouseListener(new MouseAdapter() {
public void mouseClicked(MouseEvent event) {
if (event.getButton() == MouseEvent.BUTTON1) {
try {
openWebpage(new URL(url.getText()));
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
});
//複製到剪貼簿 from Stackoverflow
copyHeadersInfo.addMouseListener(new MouseAdapter() {
public void mouseClicked(MouseEvent event) {
if(event.getButton() == MouseEvent.BUTTON1)
{
Clipboard clpbrd = Toolkit.getDefaultToolkit().getSystemClipboard();
StringSelection headerSelection = new StringSelection(headers.getText());
clpbrd.setContents(headerSelection, null);
}
}
});
download.addMouseListener(new MouseAdapter() {
public void mouseClicked(MouseEvent event) {
if(event.getButton() == MouseEvent.BUTTON1 && !path.getText().equals(""))
{
String destPath = "";
if(!fileTypeChoice.getSelectedItem().equals(""))
destPath = path.getText()+ "\\" + fileName.getText() + "." + fileTypeChoice.getSelectedItem();
else
destPath = path.getText()+ "\\" + fileName.getText() + "." + fileType.getText();
downloader.downloadFile(url.getText(), fileType.getText(), destPath);
downloadInfo.setText("Download completed !");
}
else
{
status.setText("N/A");
status.setForeground(Color.RED);
contentType.setText("N/A");
contentType.setForeground(Color.RED);
cdn.setText("N/A");
cdn.setForeground(Color.RED);
path.setText("Please select folder.");
path.setForeground(Color.RED);
headers.setText("Please check the link first.");
headers.setForeground(Color.RED);
fileName.setText("Unknown");
fileName.setForeground(Color.RED);
fileType.setText("Unknown");
fileType.setForeground(Color.RED);
}
}
});
//讓frame可見
setVisible(true);
//關閉視窗
addWindowListener(new WindowAdapter() {
public void windowClosing(WindowEvent event) {
System.exit(0);
}
});
}
//以預設的瀏覽器開啟連結
public void openWebpage(URI uri) {
Desktop desktop = Desktop.isDesktopSupported() ? Desktop.getDesktop() : null;
if (desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) {
try {
desktop.browse(uri);
} catch (Exception e) {
e.printStackTrace();
}
}
}
//URL to URI
public void openWebpage(URL url) {
try {
openWebpage(url.toURI());
} catch (URISyntaxException e) {
e.printStackTrace();
}
}
}
//FileDailog無法選擇資料夾的暫緩解法
class FolderFilter implements FilenameFilter {
public boolean accept(File dir, String name) {
return new File(dir, name).isDirectory();
}
}
其中使用FileDialog作為選取儲存的目標資料夾,因為FileDialog不提供選取資料夾的功能,故使用Stackoverflow上的暫時解法,需要點擊資料夾內的檔案2次後,才能選取到該資料夾,不用JChooser,因為它的介面跟一般作業系統風格差異頗大,而FileDialog跟系統的風格是一樣的,這樣使用者比較不會陌生,結果如下圖
再來就是重頭戲的處理http request和response的部分了,因為這個下載器是使用Httpget去下載資料,所以接下來會用httpget去跟連結位置做溝通(資料請求)與處理索回傳封包的一些資料,以下就下面幾個函數做說明:
1.checkLinkStatus: 檢查使用者輸入的連結並將回傳封包的資訊做分析與處理(Parse),並將處理好的資料存在Data Transfer Object中,此處是指ResponseStatusInfo
2.downloadFile: 下載url資源的主要程式區段,裡面會呼叫各種小函數處理各種類型的檔案下載
3.traceDomain: 在檢測使用者輸入連結時呼叫,用來取得目標位置(url)的網域,如果回傳的封包存在這項即可取得
4.getExpireDate: 取得cookie有效日期
5.extraxtFileName: 取得url最後的部分(/之後)作為暫時的檔名並顯示在檔名欄位裡,使用者可在檔名欄位依自己喜好修改檔名
6.extractFileTypeByUrl: 由url訊息取得檔案的類型,但有些檔案無法從這地方取得,就必須用下面方法
7.extractFileTypeByContentType: 藉由回傳封包標頭(Header)中的content-type取得檔案類型
8.downloadForHtml: 下載html格式的資源
9.downloadForBinaryFile: 下載二元檔格式的資源,除了文字檔(txt),其他像是圖片、影片等各種檔案(doc、dll、ppt、xsl)幾乎都是以二元檔的形式儲存,各自有各自的編碼,其實所有檔案都是binary,文字檔只是檔案中的binary是文字資料 經過某種編碼的結果而已,電腦只看懂二進位機器碼,看懂指的是電位高低,binary是機器看得懂的碼,或許中間需經過編碼轉換,二進位檔案儲存其來源看你要何種檔案格式,如何轉這已經是程式語言牽涉到system call要了解作業系統,但是總之電腦能看懂。
public class WiseDownloader {
protected ResponseStatusInfo responseStatusInfo = new ResponseStatusInfo();
protected Header[] headers;
private String extraxtFileName(String url) {
int start = url.lastIndexOf('/') + 1;
return (url.substring(start).indexOf(".") == -1) ? url.substring(start)
: url.substring(start, url.lastIndexOf("."));
}
private String extractFileTypeByUrl(String url) {
int start = url.lastIndexOf('/') + 1;
String fileName = url.substring(start);
return (fileName.lastIndexOf(".") != -1) ? fileName.substring(fileName.lastIndexOf(".") + 1) : null;
}
private String extractFileTypeByContentType(String contentType) {
int start = contentType.indexOf("/") + 1;
int end = contentType.lastIndexOf(";");
return (end != -1) ? contentType.substring(start, end) : contentType.substring(start);
}
private String traceDomain(String cookie) {
Pattern pattern = Pattern.compile("domain=(.+);");
Matcher matcher = pattern.matcher(cookie);
String domain = null;
if (matcher.find()) {
domain = matcher.group(1);
}
return domain;
}
private String getExpireDate(String cookie) {
Pattern pattern = Pattern.compile("expires=(.+); path");
Matcher matcher = pattern.matcher(cookie);
String expire = null;
if (matcher.find()) {
expire = matcher.group(1);
}
return expire;
}
protected void downloadFile(String url, String fileType, String path) {
try (CloseableHttpClient httpclient = HttpClients.createDefault();) {
responseStatusInfo.setFileName(extraxtFileName(url));
responseStatusInfo.setFileType(extractFileTypeByUrl(url));
// 建立httpget
HttpGet httpget = new HttpGet(url);
httpget.setHeader("Accept", "json;charset=UTF-8");
httpget.setHeader("Connection", "keep-alive");
httpget.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36)");
System.out.println("executing request " + httpget.getURI());
// 執行get請求
try (CloseableHttpResponse response = httpclient.execute(httpget);) {
headers = response.getAllHeaders();
for (Header header : headers) {
System.out.println("Key : " + header.getName() + " ,Value : " + header.getValue());
switch (header.getName()) {
case "Date":
responseStatusInfo.setDate(header.getValue());
break;
case "Content-Type":
responseStatusInfo.setContentType(header.getValue());
responseStatusInfo.setFileType(extractFileTypeByContentType(header.getValue()));
break;
case "Transfer-Encoding":
break;
case "Connection":
break;
case "Set-Cookie":
responseStatusInfo.setCookie(header.getValue());
responseStatusInfo.setDomain(traceDomain(header.getValue()));
responseStatusInfo.setExpireDate(getExpireDate(header.getValue()));
break;
case "Server":
responseStatusInfo.setCdnService(header.getValue());
break;
case "CF-RAY":
responseStatusInfo.setCfRay(header.getValue());
break;
}
}
// 由Response取得entity (回覆中的body部分)
HttpEntity entity = response.getEntity();
// String filePath =
// "C:\\Users\\Aingel\\Documents\\fileDownloadTest\\test.png" ;
InputStream in = entity.getContent();
if (fileType.equals("html")) {
downloadForHtml(entity, path);
} else {
downloadForBinaryFile(entity, path);
}
//用Byte陣列將資料Buffer起來並存入檔案中
// while ((len = in.read(byteArrayOutputStream.toByteArray()))
// != -1) {
// output.write(byteArrayOutputStream.toByteArray(), 0, len);
// }
// IOUtils.copyLarge(entity.getContent(), output);
// responseStatusInfo.setHttpStatus(response.getStatusLine());
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void downloadForHtml(HttpEntity entity, String destPath) throws ParseException, IOException {
// StringEscapeUtils.unescapeJava(str), EntityUtils.toString(entity,
// encoding) 用來跳脫(前者)並轉換(後者)字元以免中文字亂碼
String htmlcontent = StringEscapeUtils.unescapeJava(EntityUtils.toString(entity, "UTF-8"));
//不要用此種檔案寫入方式,如有非英文字元時,會亂碼
// try(PrintWriter out = new PrintWriter(new File(destPath));)
// {
// out.write(htmlcontent);
// }
try (Writer out = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(new File(destPath)), "UTF-8"));) {
out.write(htmlcontent);
}
}
private void downloadForBinaryFile(HttpEntity entity, String path)
throws UnsupportedOperationException, IOException {
FileOutputStream output = new FileOutputStream(path);
//將InputStream 先轉成byte array 以避免檔案過大且連線中斷造成下載檔案失敗
//轉成byte陣列之後再將資料寫入檔案中
byte[] bytes = IOUtils.toByteArray(entity.getContent());
output.write(bytes);
output.close();
}
protected void checkLinkStatus(String url) {
try (CloseableHttpClient httpclient = HttpClients.createDefault();) {
responseStatusInfo.setFileName(extraxtFileName(url));
responseStatusInfo.setFileType(extractFileTypeByUrl(url));
HttpGet httpget = new HttpGet(url);
httpget.setHeader("Accept", "json;charset=UTF-8");
httpget.setHeader("Connection", "keep-alive");
httpget.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36)");
System.out.println();
System.out.println("executing request " + httpget.getURI());
try (CloseableHttpResponse response = httpclient.execute(httpget);) {
headers = response.getAllHeaders();
for (Header header : headers) {
System.out.println("Key : " + header.getName() + " ,Value : " + header.getValue());
switch (header.getName()) {
case "Date":
responseStatusInfo.setDate(header.getValue());
break;
case "Content-Type":
responseStatusInfo.setContentType(header.getValue());
responseStatusInfo.setFileType(extractFileTypeByContentType(header.getValue()));
break;
case "Transfer-Encoding":
break;
case "Connection":
break;
case "Set-Cookie":
responseStatusInfo.setCookie(header.getValue());
responseStatusInfo.setDomain(traceDomain(header.getValue()));
responseStatusInfo.setExpireDate(getExpireDate(header.getValue()));
break;
case "Server":
responseStatusInfo.setCdnService(header.getValue());
break;
case "CF-RAY":
responseStatusInfo.setCfRay(header.getValue());
break;
}
}
HttpEntity entity = response.getEntity();
responseStatusInfo.setHttpStatus(response.getStatusLine());
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
其中不使用FileUtils跟IOUtils.copy或IOUtils.copyLarge(for large data anout 2G)的原因是因為「FileUtils.readFileToByteArray 會把文件一次性讀入內存中,要下載的文件越大,需要占用的內存也越大,當文件的大小超過JVM和Tomcat的內存配置時,OutOfMemoryError 這個問題就會不可避免的發生。弄清產生該問題的原因之後,解決的方法也很簡單:不利用Commons IO把文件一次性讀入內存,而是利用普通的文件輸出流按字節分段寫入文件,把占用的內存固定在一個指定的範圍內,從根本上避免內存占用過高的問題」,這個問題找了相當久才找到解答,很少網站有提,筆者是看搜尋很久後才知道
接下來是Data Transfer Object - 資料傳輸物件部分的程式碼:
public class ResponseStatusInfo {
private Date date;
private String domain;
private String httpStatus;
private String cdnService;
private String cfRay;
private String cookie;
private String expireDate;
private String contentType;
private String responseStatus;
private String fileName;
private String fileType;
public String getFileType() {
return fileType;
}
public void setFileType(String fileType) {
this.fileType = fileType;
}
public String getHttpStatus() {
return httpStatus;
}
public void setHttpStatus(StatusLine statusLine) {
this.httpStatus = statusLine.toString();
}
public String getCdnService() {
return cdnService;
}
public void setCdnService(String cdnService) {
this.cdnService = cdnService;
}
public String getCfRay() {
return cfRay;
}
public void setCfRay(String cfRay) {
this.cfRay = cfRay;
}
public Date getDate() {
return date;
}
public void setDate(String date) {
this.date = new Date(date);
}
public String getDomain() {
return domain;
}
public void setDomain(String domain) {
this.domain = domain;
}
public String getCookie() {
return cookie;
}
public void setCookie(String cookie) {
this.cookie = cookie;
}
public String getExpireDate() {
return expireDate;
}
public void setExpireDate(String expireDate) {
this.expireDate = expireDate;
}
public String getContentType() {
return contentType;
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
public String getResponseStatus() {
return responseStatus;
}
public void setResponseStatus(String responseStatus) {
this.responseStatus = responseStatus;
}
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
}
而做這個下載器需要下列的library(jar):
程式執行畫面如下:
這回就到這邊結束,謝謝大家
Keyword:
String to file
byte[] to file in Java
Inputstream to file
IoUtils
Copying Text to the Clipboard using Java
How to write a UTF-8 file with Java?
**P.S. / Reference: Java Programming Tutorial
Programming Graphical User Interface (GUI)
利用SpringMVC下载大文件