Java - Make a downloader by Httpget in Java

What tool can you download website resource by HTTP ?

  今天要來講如何用Java中的httpcore.jar(提供Http協議服務的一些class、ex:http Request . http Response)、httpcomponents-client-4.52.jar(模擬瀏覽器向伺服器端(server)發出request(請求)的行為)等等的library來擷取網站的資源,首先,既然是一個下載器器當然就要有Graphical User Interface GUI-輔助使用者操作的一種介面 ,GUI可以讓使用者不用輸入指令,即可對電腦下達執行某些功能或做某些事的介面,通常藉由點擊滑鼠,或鍵盤的一些簡單的輸入,所以我們要先來做這個下載器的介面部分,圖形介面部分我們會使用到awt 這個library裡面的一些東西,例如:Panel、Label、Textfield、Button、TextArea、FlowLayout、GridLayout、BorderLayout等等的元件(Components),如下圖所示

  而接下來要做的使用者介面者如下:


  如圖所示,會有一個欄位可以輸入網址,然後Check the link 可以測試連結的狀態,然後可以選擇要把url取得的資源下載到哪個資料夾底下、Open link in Browser可以用你預設的瀏覽器開啟你剛剛貼的那個資源、Copy all info of Headers能將途中的Header info複製並貼到你想要的編輯器上,以及可以下載你輸入的url的資源,並選擇檔案的格式(也會自動偵測)。

  Grapic User Interface的程式碼如下:
public class GUIDownloader extends Frame {
    Panel panel;
    TextField url;
    Button checkLink;
    Panel statusInfo;
    TextField status;
    TextField contentType;
    TextField cdn;
    Panel headerInfo;
    TextArea headers;
    Panel pathPanel;
    TextField path;
    Button pathSelect;
    Panel feather;
    Button openLink;
    Button copyHeadersInfo;
    Button download;
    WiseDownloader downloader;
    Panel downloadSetting;
    TextField fileName;
    TextField fileType;
    Choice fileTypeChoice;
    String[] fileTypeCollection = { "", "html", "png", "jpg", "png", "tif", "gif", "bmp", "flv", "mp3", "mp4" };
    FileDialog fileDialog;
    TextField downloadInfo;
    public static void main(String[] args) {

        new GUIDownloader("Fast Downloader");
    }

    public GUIDownloader(String title) {
        // 設定程式標題
        setTitle(title);
        // 設定視窗大小
        setSize(900, 700);
        // 設定Layout
        setLayout(new FlowLayout());
        panel = new Panel();
        add(panel);

        panel.add(new Label("Enter url of the web you want: "));
        url = new TextField(70);
        panel.add(url);
        checkLink = new Button("Check the link");
        panel.add(checkLink);
        add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
        statusInfo = new Panel(new GridLayout(3, 2));
        statusInfo.add(new Label("Http Status: "));
        status = new TextField(15);
        status.setEditable(false);
        //設定字形 與 大小
        Font font = new Font(Font.DIALOG, Font.TYPE1_FONT, 20);
        Font font2 = new Font(Font.DIALOG, Font.TYPE1_FONT, 18);
        status.setFont(font);
        statusInfo.add(status);
        add(statusInfo);
        statusInfo.add(new Label("Content-Type: "));
        contentType = new TextField(20);
        contentType.setFont(font);
        contentType.setEditable(false);
        statusInfo.add(contentType);
        statusInfo.add(new Label("CDN Service: "));
        cdn = new TextField(15);
        cdn.setEditable(false);
        cdn.setFont(font);
        statusInfo.add(cdn);
        add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
        headerInfo = new Panel(new BorderLayout());
        headerInfo.add(new Label("Header Info: "), BorderLayout.NORTH);
        headers = new TextArea(10, 60);
        headers.setEditable(false);
        headerInfo.add(headers);
        headers.setFont(font2);
        add(headerInfo);
        add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
        pathPanel = new Panel();
        pathPanel.add(new Label("Path: "));
        path = new TextField(40);
        path.setEditable(false);
        pathPanel.add(path);
        pathSelect = new Button("...");
        pathPanel.add(pathSelect);
        add(pathPanel);
        add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
        downloadSetting = new Panel(new GridLayout(4, 2));
        downloadSetting.add(new Label("Download Info: "), BorderLayout.NORTH);
        downloadSetting.add(new Label(""));
        downloadSetting.add(new Label("name of file: "), BorderLayout.CENTER);
        fileName = new TextField(30);
        downloadSetting.add(fileName, BorderLayout.CENTER);
        downloadSetting.add(new Label("type  of  file: "), BorderLayout.CENTER);
        fileType = new TextField(10);
        downloadSetting.add(fileType);
        downloadSetting.add(new Label("choose the type  of  file(priority): "));
        fileTypeChoice = new Choice();
        for (String emt : fileTypeCollection) {
            fileTypeChoice.add(emt);
        }
        downloadSetting.add(fileTypeChoice);

        add(downloadSetting);
        add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
        feather = new Panel(new FlowLayout());
        openLink = new Button("Open link in Browser");
        feather.add(openLink);
        copyHeadersInfo = new Button("Copy all info of Headers");
        feather.add(copyHeadersInfo);
        download = new Button("Download !");
        feather.add(download);
        add(feather);
        add(Box.createHorizontalStrut(2000)); // 加入一個長度非常長的看不見的GUI元件 迫使排版換行
        Panel downloadStatus = new Panel();
        downloadInfo = new TextField(40);
        downloadInfo.setEditable(false);
        downloadStatus.add(downloadInfo);
        add(downloadStatus);
        downloadInfo.setFont(font);
        downloadInfo.setForeground(Color.green);

        checkLink.addMouseListener(new MouseAdapter() {
            public void mouseClicked(MouseEvent e) {
                if (!url.getText().equals("") && e.getButton() == MouseEvent.BUTTON1) {
                    downloader = new WiseDownloader();
                    downloader.ConnectToTarget(url.getText());
                    if (downloader.responseStatusInfo.getHttpStatus().indexOf("200") != -1) {
                        status.setText(downloader.responseStatusInfo.getHttpStatus());
                        contentType.setText(downloader.responseStatusInfo.getContentType());
                        cdn.setText(downloader.responseStatusInfo.getCdnService());
                        status.setForeground(Color.green);
                        fileName.setText(downloader.responseStatusInfo.getFileName());
                        fileType.setText(downloader.responseStatusInfo.getFileType());
                        String headerInfo = "";
                        for (Header header : downloader.headers) {
                            headerInfo += header.getName();
                            if (header.getName().length() < 30) {
                                for (int i = 0; i < 30 - header.getName().length(); i++) {
                                    headerInfo += " ";
                                }
                            }
                            headerInfo += header.getValue() + "\n";
                        }
                        headers.setForeground(Color.BLUE);
                        headers.setText(headerInfo);
                        contentType.setForeground(Color.BLACK);
                        cdn.setForeground(Color.BLACK);
                        fileName.setForeground(Color.BLACK);
                        fileType.setForeground(Color.BLACK);
                    }
                }
            }
        });

        pathSelect.addMouseListener(new MouseAdapter() {
            public void mouseClicked(MouseEvent event) {
                if (event.getButton() == MouseEvent.BUTTON1) {
                     fileDialog = new
                     FileDialog(GUIDownloader.this, "Select Folder",
                     FileDialog.LOAD);
                     fileDialog.setFilenameFilter(new FolderFilter());
                     fileDialog.show();
                     fileDialog.setVisible(true);
                     String folderSelected = fileDialog.getDirectory();
                     File folder = new File(folderSelected);
                     if(folder.exists() && folder.isDirectory())
                     {
                         path.setText(folder.getAbsolutePath());
                         path.setForeground(Color.BLACK);
                     }
                }
            }
        });
        //在Browser開啟連結
        openLink.addMouseListener(new MouseAdapter() {
            public void mouseClicked(MouseEvent event) {
                if (event.getButton() == MouseEvent.BUTTON1) {
                    try {
                        openWebpage(new URL(url.getText()));
                    } catch (MalformedURLException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
            }
        });

        //複製到剪貼簿 from Stackoverflow
        copyHeadersInfo.addMouseListener(new MouseAdapter() {
            public void mouseClicked(MouseEvent event) {
                if(event.getButton() == MouseEvent.BUTTON1)
                {
                    Clipboard clpbrd = Toolkit.getDefaultToolkit().getSystemClipboard();
                    StringSelection headerSelection = new StringSelection(headers.getText());
                    clpbrd.setContents(headerSelection, null);
                }
            }
        });
        download.addMouseListener(new MouseAdapter() {
            public void mouseClicked(MouseEvent event) {
                if(event.getButton() == MouseEvent.BUTTON1 && !path.getText().equals(""))
                {
                    String destPath = "";
                    if(!fileTypeChoice.getSelectedItem().equals(""))
                        destPath = path.getText()+ "\\" + fileName.getText() + "." + fileTypeChoice.getSelectedItem();
                    else
                        destPath = path.getText()+ "\\" + fileName.getText() + "." + fileType.getText();

                    downloader.downloadFile(url.getText(), fileType.getText(), destPath);
                    downloadInfo.setText("Download completed !");
                }
                else
                {

                    status.setText("N/A");
                    status.setForeground(Color.RED);
                    contentType.setText("N/A");
                    contentType.setForeground(Color.RED);
                    cdn.setText("N/A");
                    cdn.setForeground(Color.RED);
                    path.setText("Please select folder.");
                    path.setForeground(Color.RED);
                    headers.setText("Please check the link first.");
                    headers.setForeground(Color.RED);
                    fileName.setText("Unknown");
                    fileName.setForeground(Color.RED);
                    fileType.setText("Unknown");
                    fileType.setForeground(Color.RED);
                }
            }
        });

        //讓frame可見
        setVisible(true);
        //關閉視窗
        addWindowListener(new WindowAdapter() {
            public void windowClosing(WindowEvent event) {
                System.exit(0);
            }
        });
    }
    //以預設的瀏覽器開啟連結
    public void openWebpage(URI uri) {
        Desktop desktop = Desktop.isDesktopSupported() ? Desktop.getDesktop() : null;
        if (desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) {
            try {
                desktop.browse(uri);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    //URL to URI
    public void openWebpage(URL url) {
        try {
            openWebpage(url.toURI());
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }
}
//FileDailog無法選擇資料夾的暫緩解法
class FolderFilter implements FilenameFilter {
    public boolean accept(File dir, String name) {
        return new File(dir, name).isDirectory();
    }
}

  其中使用FileDialog作為選取儲存的目標資料夾,因為FileDialog不提供選取資料夾的功能,故使用Stackoverflow上的暫時解法,需要點擊資料夾內的檔案2次後,才能選取到該資料夾,不用JChooser,因為它的介面跟一般作業系統風格差異頗大,而FileDialog跟系統的風格是一樣的,這樣使用者比較不會陌生,結果如下圖


  再來就是重頭戲的處理http request和response的部分了,因為這個下載器是使用Httpget去下載資料,所以接下來會用httpget去跟連結位置做溝通(資料請求)與處理索回傳封包的一些資料,以下就下面幾個函數做說明:

1.checkLinkStatus: 檢查使用者輸入的連結並將回傳封包的資訊做分析與處理(Parse),並將處理好的資料存在Data Transfer Object中,此處是指ResponseStatusInfo
2.downloadFile: 下載url資源的主要程式區段,裡面會呼叫各種小函數處理各種類型的檔案下載
3.traceDomain: 在檢測使用者輸入連結時呼叫,用來取得目標位置(url)的網域,如果回傳的封包存在這項即可取得
4.getExpireDate: 取得cookie有效日期
5.extraxtFileName: 取得url最後的部分(/之後)作為暫時的檔名並顯示在檔名欄位裡,使用者可在檔名欄位依自己喜好修改檔名
6.extractFileTypeByUrl: 由url訊息取得檔案的類型,但有些檔案無法從這地方取得,就必須用下面方法
7.extractFileTypeByContentType: 藉由回傳封包標頭(Header)中的content-type取得檔案類型
8.downloadForHtml: 下載html格式的資源
9.downloadForBinaryFile: 下載二元檔格式的資源,除了文字檔(txt),其他像是圖片、影片等各種檔案(doc、dll、ppt、xsl)幾乎都是以二元檔的形式儲存,各自有各自的編碼,其實所有檔案都是binary,文字檔只是檔案中的binary是文字資料 經過某種編碼的結果而已,電腦只看懂二進位機器碼,看懂指的是電位高低,binary是機器看得懂的碼,或許中間需經過編碼轉換,二進位檔案儲存其來源看你要何種檔案格式,如何轉這已經是程式語言牽涉到system call要了解作業系統,但是總之電腦能看懂。
public class WiseDownloader {
    protected ResponseStatusInfo responseStatusInfo = new ResponseStatusInfo();
    protected Header[] headers;


    private String extraxtFileName(String url) {
        int start = url.lastIndexOf('/') + 1;
        return (url.substring(start).indexOf(".") == -1) ? url.substring(start)
                : url.substring(start, url.lastIndexOf("."));
    }

    private String extractFileTypeByUrl(String url) {
        int start = url.lastIndexOf('/') + 1;
        String fileName = url.substring(start);
        return (fileName.lastIndexOf(".") != -1) ? fileName.substring(fileName.lastIndexOf(".") + 1) : null;
    }

    private String extractFileTypeByContentType(String contentType) {
        int start = contentType.indexOf("/") + 1;
        int end = contentType.lastIndexOf(";");
        return (end != -1) ? contentType.substring(start, end) : contentType.substring(start);
    }

    private String traceDomain(String cookie) {
        Pattern pattern = Pattern.compile("domain=(.+);");
        Matcher matcher = pattern.matcher(cookie);
        String domain = null;
        if (matcher.find()) {
            domain = matcher.group(1);
        }
        return domain;
    }

    private String getExpireDate(String cookie) {
        Pattern pattern = Pattern.compile("expires=(.+); path");
        Matcher matcher = pattern.matcher(cookie);
        String expire = null;
        if (matcher.find()) {
            expire = matcher.group(1);
        }
        return expire;
    }

    protected void downloadFile(String url, String fileType, String path) {
        try (CloseableHttpClient httpclient = HttpClients.createDefault();) {
            responseStatusInfo.setFileName(extraxtFileName(url));
            responseStatusInfo.setFileType(extractFileTypeByUrl(url));
            // 建立httpget
            HttpGet httpget = new HttpGet(url);
            httpget.setHeader("Accept", "json;charset=UTF-8");
            httpget.setHeader("Connection", "keep-alive");
            httpget.setHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36)");
            System.out.println("executing request " + httpget.getURI());
            // 執行get請求
            try (CloseableHttpResponse response = httpclient.execute(httpget);) {
                headers = response.getAllHeaders();
                for (Header header : headers) {
                    System.out.println("Key : " + header.getName() + " ,Value : " + header.getValue());
                    switch (header.getName()) {
                    case "Date":
                        responseStatusInfo.setDate(header.getValue());
                        break;
                    case "Content-Type":
                        responseStatusInfo.setContentType(header.getValue());
                        responseStatusInfo.setFileType(extractFileTypeByContentType(header.getValue()));
                        break;
                    case "Transfer-Encoding":
                        break;
                    case "Connection":
                        break;
                    case "Set-Cookie":
                        responseStatusInfo.setCookie(header.getValue());
                        responseStatusInfo.setDomain(traceDomain(header.getValue()));
                        responseStatusInfo.setExpireDate(getExpireDate(header.getValue()));
                        break;
                    case "Server":
                        responseStatusInfo.setCdnService(header.getValue());
                        break;
                    case "CF-RAY":
                        responseStatusInfo.setCfRay(header.getValue());
                        break;

                    }
                }
                // 由Response取得entity (回覆中的body部分)
                HttpEntity entity = response.getEntity();
                // String filePath =
                // "C:\\Users\\Aingel\\Documents\\fileDownloadTest\\test.png" ;
                InputStream in = entity.getContent();
                if (fileType.equals("html")) {
                    downloadForHtml(entity, path);
                } else {
                    downloadForBinaryFile(entity, path);
                }
                //用Byte陣列將資料Buffer起來並存入檔案中
                // while ((len = in.read(byteArrayOutputStream.toByteArray()))
                // != -1) {
                // output.write(byteArrayOutputStream.toByteArray(), 0, len);
                // }
                // IOUtils.copyLarge(entity.getContent(), output);

                // responseStatusInfo.setHttpStatus(response.getStatusLine());
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public void downloadForHtml(HttpEntity entity, String destPath) throws ParseException, IOException {
        // StringEscapeUtils.unescapeJava(str), EntityUtils.toString(entity,
        // encoding) 用來跳脫(前者)並轉換(後者)字元以免中文字亂碼
        String htmlcontent = StringEscapeUtils.unescapeJava(EntityUtils.toString(entity, "UTF-8"));
        //不要用此種檔案寫入方式,如有非英文字元時,會亂碼
        // try(PrintWriter out = new PrintWriter(new File(destPath));)
        // {
        // out.write(htmlcontent);
        // }

        try (Writer out = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(new File(destPath)), "UTF-8"));) {
            out.write(htmlcontent);
        }
    }

    private void downloadForBinaryFile(HttpEntity entity, String path)
            throws UnsupportedOperationException, IOException {
        FileOutputStream output = new FileOutputStream(path);
        //將InputStream 先轉成byte array 以避免檔案過大且連線中斷造成下載檔案失敗
        //轉成byte陣列之後再將資料寫入檔案中
        byte[] bytes = IOUtils.toByteArray(entity.getContent());
        output.write(bytes);
        output.close();
    }

    protected void checkLinkStatus(String url) {
        try (CloseableHttpClient httpclient = HttpClients.createDefault();) {
            responseStatusInfo.setFileName(extraxtFileName(url));
            responseStatusInfo.setFileType(extractFileTypeByUrl(url));
            HttpGet httpget = new HttpGet(url);
            httpget.setHeader("Accept", "json;charset=UTF-8");
            httpget.setHeader("Connection", "keep-alive");
            httpget.setHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36)");
            System.out.println();
            System.out.println("executing request " + httpget.getURI());
            try (CloseableHttpResponse response = httpclient.execute(httpget);) {
                headers = response.getAllHeaders();
                for (Header header : headers) {
                    System.out.println("Key : " + header.getName() + " ,Value : " + header.getValue());
                    switch (header.getName()) {
                    case "Date":
                        responseStatusInfo.setDate(header.getValue());
                        break;
                    case "Content-Type":
                        responseStatusInfo.setContentType(header.getValue());
                        responseStatusInfo.setFileType(extractFileTypeByContentType(header.getValue()));
                        break;
                    case "Transfer-Encoding":
                        break;
                    case "Connection":
                        break;
                    case "Set-Cookie":
                        responseStatusInfo.setCookie(header.getValue());
                        responseStatusInfo.setDomain(traceDomain(header.getValue()));
                        responseStatusInfo.setExpireDate(getExpireDate(header.getValue()));
                        break;
                    case "Server":
                        responseStatusInfo.setCdnService(header.getValue());
                        break;
                    case "CF-RAY":
                        responseStatusInfo.setCfRay(header.getValue());
                        break;

                    }
                }
                HttpEntity entity = response.getEntity();
                responseStatusInfo.setHttpStatus(response.getStatusLine());
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

  其中不使用FileUtils跟IOUtils.copy或IOUtils.copyLarge(for large data anout 2G)的原因是因為「FileUtils.readFileToByteArray 會把文件一次性讀入內存中,要下載的文件越大,需要占用的內存也越大,當文件的大小超過JVM和Tomcat的內存配置時,OutOfMemoryError 這個問題就會不可避免的發生。弄清產生該問題的原因之後,解決的方法也很簡單:不利用Commons IO把文件一次性讀入內存,而是利用普通的文件輸出流按字節分段寫入文件,把占用的內存固定在一個指定的範圍內,從根本上避免內存占用過高的問題」,這個問題找了相當久才找到解答,很少網站有提,筆者是看搜尋很久後才知道

  接下來是Data Transfer Object - 資料傳輸物件部分的程式碼:
public class ResponseStatusInfo {
    private Date date;
    private String domain;
    private String httpStatus;
    private String cdnService;
    private String cfRay;
    private String cookie;
    private String expireDate;
    private String contentType;
    private String responseStatus;
    private String fileName;
    private String fileType;

    public String getFileType() {
        return fileType;
    }

    public void setFileType(String fileType) {
        this.fileType = fileType;
    }

    public String getHttpStatus() {
        return httpStatus;
    }

    public void setHttpStatus(StatusLine statusLine) {
        this.httpStatus = statusLine.toString();
    }

    public String getCdnService() {
        return cdnService;
    }

    public void setCdnService(String cdnService) {
        this.cdnService = cdnService;
    }

    public String getCfRay() {
        return cfRay;
    }

    public void setCfRay(String cfRay) {
        this.cfRay = cfRay;
    }

    public Date getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = new Date(date);
    }

    public String getDomain() {
        return domain;
    }

    public void setDomain(String domain) {
        this.domain = domain;
    }

    public String getCookie() {
        return cookie;
    }

    public void setCookie(String cookie) {
        this.cookie = cookie;
    }

    public String getExpireDate() {
        return expireDate;
    }

    public void setExpireDate(String expireDate) {
        this.expireDate = expireDate;
    }

    public String getContentType() {
        return contentType;
    }

    public void setContentType(String contentType) {
        this.contentType = contentType;
    }

    public String getResponseStatus() {
        return responseStatus;
    }

    public void setResponseStatus(String responseStatus) {
        this.responseStatus = responseStatus;
    }

    public String getFileName() {
        return fileName;
    }

    public void setFileName(String fileName) {
        this.fileName = fileName;
    }

}

  而做這個下載器需要下列的library(jar):


  程式執行畫面如下:


  這回就到這邊結束,謝謝大家

下載器程式(jar檔)

Keyword:
String to file
byte[] to file in Java
Inputstream to file
IoUtils
Copying Text to the Clipboard using Java
How to write a UTF-8 file with Java?



**P.S. / Reference:  Java Programming Tutorial Programming Graphical User Interface (GUI)
           利用SpringMVC下载大文件

results matching ""

    No results matching ""