/**
* MediaSniper 3.0 (2008-08-02)
* Copyright 2007 - 2008 Zach Scrivena
* zachscrivena@gmail.com
* http://mediasniper.patternforge.net/
*
* Simple program for downloading media files from popular websites.
*
* TERMS AND CONDITIONS:
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.freeshell.zs.mediasniper;
import java.awt.Image;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.event.WindowAdapter;
import java.awt.event.WindowEvent;
import java.io.File;
import java.lang.String;
import java.net.URL;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JTable;
import javax.swing.SwingUtilities;
import javax.swing.event.ListSelectionEvent;
import javax.swing.event.ListSelectionListener;
import javax.swing.table.AbstractTableModel;
import javax.swing.table.TableColumnModel;
import javax.swing.text.JTextComponent;
import org.freeshell.zs.common.Debug;
import org.freeshell.zs.common.Downloader;
import org.freeshell.zs.common.SwingManipulator;
/**
* Parse a webpage for media file downloads.
*/
class WebpageParser
extends JFrame
implements Runnable, ListSelectionListener
{
/** refresh interval, in milliseconds */
private static final long REFRESH_INTERVAL_MILLISECONDS = 100L;
/** parent MediaSniper object */
private final MediaSniper parent;
/** webpage URL to be parsed */
private final URL url;
/** table model for table of downloads */
private final CandidatesTableModel candidatesTableModel = new CandidatesTableModel();
/** media file downloads */
private final List<Download> candidates = new ArrayList<Download>();
/** is the parser cancelled? */
volatile private boolean cancelled = false;
/** has the parsing completed? */
volatile private boolean completed = false;
/** downloader for retrieving webpages */
volatile private Downloader downloader = null;
/**
* Constructor.
*
* @param parent
* parent MediaSniper object
* @param url
* webpage URL to be parsed
* @throws java.lang.UnsupportedOperationException
* if no definitions can be found for the specified URL
*/
WebpageParser(
final MediaSniper parent,
final URL url)
throws UnsupportedOperationException
{
this.parent = parent;
this.url= url;
parent.addParser(this);
/******************************
* INITIALIZE FORM COMPONENTS *
******************************/
initComponents();
/*****************************
* CONFIGURE FORM COMPONENTS *
*****************************/
setTitle("Webpage Parser - " + parent.getTitle());
/* inherit "always on top" behavior of parent */
try
{
setAlwaysOnTop(parent.isAlwaysOnTop());
}
catch (Exception e)
{
/* ignore */
}
/* inherit program icon of parent */
final List<Image> icons = parent.getIconImages();
if (!icons.isEmpty())
{
setIconImage(icons.get(0));
}
addWindowListener(new WindowAdapter()
{
@Override
public void windowIconified(WindowEvent e)
{
minimizeParser();
}
@Override
public void windowDeiconified(WindowEvent e)
{
restoreParser();
}
@Override
public void windowClosing(WindowEvent e)
{
cancelWithoutAddingCandidates();
}
});
/* webpage URL text area */
urlText.setText(url.toString());
SwingManipulator.addStandardEditingPopupMenu(new JTextComponent[] {urlText});
/* progress bar */
SwingManipulator.updateProgressBar(progress, "Waiting to start", -1);
/* table of candidate media file downloads */
candidatesTable.setAutoResizeMode(JTable.AUTO_RESIZE_SUBSEQUENT_COLUMNS);
final TableColumnModel colModel = candidatesTable.getColumnModel();
colModel.getColumn(0).setMinWidth(0);
colModel.getColumn(1).setMinWidth(0);
colModel.getColumn(0).setPreferredWidth(50);
colModel.getColumn(1).setPreferredWidth(80);
candidatesTable.setToolTipText("Mouse-over any row for more information");
candidatesTable.setDefaultRenderer(TableStringCell.class, new TableStringCellRenderer(
candidatesTable.getForeground(),
candidatesTable.getBackground(),
candidatesTable.getSelectionForeground(),
candidatesTable.getSelectionBackground()));
candidatesTable.setDefaultEditor(TableStringCell.class, new TableStringCellEditor());
candidatesTable.getSelectionModel().addListSelectionListener(this);
/* button: "Add All" */
allButton.addActionListener(new ActionListener()
{
@Override
public void actionPerformed(ActionEvent e)
{
addAllCandidates();
}
});
/* button: "Add Selected" */
selectedButton.addActionListener(new ActionListener()
{
@Override
public void actionPerformed(ActionEvent e)
{
addSelectedCandidates();
}
});
/* button: "Cancel" */
cancelButton.addActionListener(new ActionListener()
{
@Override
public void actionPerformed(ActionEvent e)
{
cancelWithoutAddingCandidates();
}
});
/* refresh buttons */
refreshButtons();
/* center form on the parent */
setLocationRelativeTo(parent);
/*****************************************
* WORKER THREAD FOR RETRIEVING WEBPAGES *
*****************************************/
new Thread(new Runnable()
{
public void run()
{
while (!cancelled)
{
if (downloader != null)
{
/* proceed with download */
downloader.run();
downloader = null;
}
Debug.sleep(REFRESH_INTERVAL_MILLISECONDS);
}
}
}).start();
}
/**
* Close the parser.
* This method must run on the EDT.
*/
private void closeParser()
{
cancelled = true;
parent.removeParser(this);
setVisible(false);
dispose();
}
/**
* Minimize the parser.
* This method must run on the EDT.
*/
private void minimizeParser()
{
setExtendedState(JFrame.ICONIFIED);
}
/**
* Restore the parser.
* This method must run on the EDT.
*/
private void restoreParser()
{
setExtendedState(JFrame.NORMAL);
toFront();
}
/**
* Refresh buttons.
* This method must run on the EDT.
*/
private void refreshButtons()
{
final int rowCount = candidatesTable.getRowCount();
if (rowCount > 0)
{
allButton.setText("Add All (" + rowCount + ")");
allButton.setEnabled(true);
}
else
{
allButton.setText("Add All");
allButton.setEnabled(false);
}
final int selectedRowCount = candidatesTable.getSelectedRowCount();
if (selectedRowCount > 0)
{
selectedButton.setText("Add Selected (" + selectedRowCount + ")");
selectedButton.setEnabled(true);
}
else
{
selectedButton.setText("Add Selected");
selectedButton.setEnabled(false);
}
}
/**
* Respond to a list selection event on the table of candidate media file downloads.
*
* @param e
* list selection event
*/
public void valueChanged(
ListSelectionEvent e)
{
refreshButtons();
}
/**
* Register the specified download by adding it to the list of candidate media file downloads.
*
* @param d
* download to be registered
*/
private void registerCandidate(
final Download d)
{
final int numCandidates;
synchronized (candidates)
{
candidates.add(d);
numCandidates = candidates.size();
}
candidatesTableModel.fireTableRowsInserted(numCandidates - 1, numCandidates - 1);
refreshButtons();
}
/**
* Add all the candidate media file downloads for actual downloading.
*/
private void addAllCandidates()
{
synchronized (candidates)
{
for (Download d : candidates)
{
parent.addDownload(d);
}
closeParser();
}
}
/**
* Add selected candidate media file downloads for actual downloading.
*/
private void addSelectedCandidates()
{
synchronized (candidates)
{
final boolean[] selected = new boolean[candidates.size()];
Arrays.fill(selected, false);
for (int i : candidatesTable.getSelectedRows())
{
selected[candidatesTable.convertRowIndexToModel(i)] = true;
}
for (int i = 0; i < selected.length; i++)
{
final Download d = candidates.get(i);
if (selected[i])
{
parent.addDownload(d);
}
else
{
parent.localFilenameManager.releaseFilename(d.getFile());
}
}
closeParser();
}
}
/**
* Cancel the parser, without adding any candidate media file downloads.
*/
private void cancelWithoutAddingCandidates()
{
if (!cancelled && !completed)
{
/* confirm cancellation */
final int choice = JOptionPane.showConfirmDialog(
this,
parent.properties.getString("name") + " has not finished parsing this webpage. Cancel parsing now?",
"Confirm Cancel - " + getTitle(),
JOptionPane.YES_NO_OPTION,
JOptionPane.WARNING_MESSAGE);
if (choice != JOptionPane.YES_OPTION)
{
return;
}
}
synchronized (candidates)
{
for (Download d : candidates)
{
parent.localFilenameManager.releaseFilename(d.getFile());
}
}
closeParser();
}
/**
* Start parsing the webpage.
* This method should run on a dedicated worker thread, not the EDT.
*/
public void run()
{
/* wait until the definitions have been loaded completely */
while (parent.definitions == null)
{
if (cancelled)
{
return;
}
Debug.sleep(REFRESH_INTERVAL_MILLISECONDS);
}
/** key-CapturingGroups map for matched regex pattern strings */
final Map<String,String[]> matchedPatterns = new HashMap<String,String[]>();
/* stack of webpage URLs to be parsed */
final Deque<URL> urls = new ArrayDeque<URL>();
urls.push(url);
final Set<String> visitedLinks = new HashSet<String>();
visitedLinks.add(url.toString());
boolean firstUrl = true;
boolean parsingFailed = false;
final Deque<URL> recurseUrls = new ArrayDeque<URL>();
final StringBuilder sb = new StringBuilder();
try
{
NextUrl:
while (!urls.isEmpty())
{
final URL u = urls.pop();
final String urlString = u.toString();
if (parent.properties.getBoolean("debug"))
{
Debug.p("WebpageParser: \"" + urlString + "\"");
}
/* download webpage content */
sb.delete(0, sb.length());
final Downloader dl = new Downloader(u, sb);
downloader = dl;
/* wait for download to finish */
while (true)
{
checkState();
final Downloader d = downloader;
if (d == null)
{
break;
}
if (d.isProgressUpdated())
{
SwingManipulator.updateProgressBar(progress, d.getProgressString(), d.getProgressPercent());
}
Debug.sleep(REFRESH_INTERVAL_MILLISECONDS);
}
try
{
dl.waitUntilCompleted();
}
catch (Exception e)
{
if (firstUrl)
{
parsingFailed = true;
throw e;
}
else
{
continue NextUrl;
}
}
NextDefinition:
for (final Definition def : parent.definitions)
{
checkState();
final Matcher urlMatcher;
try
{
urlMatcher = def.getPattern("url.match").matcher(urlString);
}
catch (Exception e)
{
continue NextDefinition;
}
if (!urlMatcher.matches())
{
continue NextDefinition;
}
matchedPatterns.put("url.match", Definition.capturingGroupsAsArray(urlMatcher));
/* find candidate media file downloads */
SwingManipulator.updateProgressBar(progress, "Finding candidate media file downloads", -1);
NextDownloadMatcher:
for (int k = 0; k < Integer.MAX_VALUE; k++)
{
final String downloadKey = (k == 0) ? "download" : ("download" + k);
final Matcher downloadMatcher;
try
{
downloadMatcher = def.getPattern(downloadKey + ".match").matcher(sb);
}
catch (Exception e)
{
break NextDownloadMatcher;
}
NextDownload:
while (downloadMatcher.find())
{
checkState();
if (firstUrl)
{
SwingUtilities.invokeLater(new Runnable()
{
public void run()
{
setTitle(def.getName() + " - " + parent.getTitle());
}
});
firstUrl = false;
}
/* add candidate media file download */
matchedPatterns.put(downloadKey + ".match", Definition.capturingGroupsAsArray(downloadMatcher));
final String downloadUrlString;
String downloadTitle;
final String downloadFilename;
final String downloadFileext;
try
{
downloadUrlString = def.evaluateTargetString(downloadKey + ".url", matchedPatterns).trim();
downloadTitle = def.evaluateTargetString(downloadKey + ".title", matchedPatterns).trim();
downloadFilename = def.evaluateTargetString(downloadKey + ".filename", matchedPatterns).trim();
downloadFileext = def.evaluateTargetString(downloadKey + ".fileext", matchedPatterns).trim();
}
catch (Exception e)
{
continue NextDownloadMatcher;
}
if (visitedLinks.contains(downloadUrlString))
{
continue NextDownload;
}
else
{
visitedLinks.add(downloadUrlString);
}
final URL downloadUrl;
try
{
downloadUrl = new URL(downloadUrlString);
}
catch (Exception e)
{
continue NextDownload;
}
/* use a generic title if media title is empty */
if (downloadTitle.isEmpty())
{
downloadTitle = def.getName();
}
/* generate a "clean" (legal and unused) local filename */
final File downloadFile = parent.localFilenameManager.getCleanFilename(
downloadFilename,
downloadFileext,
parent.properties.getFile("download.directory"),
parent.properties.getBoolean("restricted.ascii.filenames"),
parent.properties.getBoolean("short.filenames"));
if (downloadFile == null)
{
/* failed to generate "clean" local filename */
SwingManipulator.showWarningDialog(
this,
getTitle(),
"Failed to generate local filename for candidate media file at URL \"" +
downloadUrlString + "\" using title \"" + downloadTitle +
"\". This media file will be ignored.");
}
else
{
/* register candidate media file download */
registerCandidate(new Download(
parent,
downloadTitle,
downloadUrl,
downloadFile));
}
}
}
/* find recursive webpage URLs */
SwingManipulator.updateProgressBar(progress, "Finding recursive webpage URLs", -1);
NextRecurseMatcher:
for (int k = 0; k < Integer.MAX_VALUE; k++)
{
final String recurseKey = (k == 0) ? "recurse" : ("recurse" + k);
final Matcher recurseMatcher;
try
{
recurseMatcher = def.getPattern(recurseKey + ".match").matcher(sb);
}
catch (Exception e)
{
break NextRecurseMatcher;
}
NextRecurse:
while (recurseMatcher.find())
{
checkState();
if (firstUrl)
{
SwingUtilities.invokeLater(new Runnable()
{
public void run()
{
setTitle(def.getName() + " - " + parent.getTitle());
}
});
firstUrl = false;
}
/* add recursive webpage URL */
matchedPatterns.put(recurseKey + ".match", Definition.capturingGroupsAsArray(recurseMatcher));
final String recurseUrlString;
try
{
recurseUrlString = def.evaluateTargetString(recurseKey + ".url", matchedPatterns).trim();
}
catch (Exception e)
{
continue NextRecurseMatcher;
}
try
{
if (!visitedLinks.contains(recurseUrlString))
{
visitedLinks.add(recurseUrlString);
recurseUrls.push(new URL(recurseUrlString));
}
}
catch (Exception e)
{
continue NextRecurse;
}
}
}
}
/* add recursive URLs to the stack of webpages to be parsed */
while (!recurseUrls.isEmpty())
{
urls.push(recurseUrls.pop());
}
}
/* completed parsing webpage */
SwingManipulator.updateProgressBar(progress, "Completed parsing webpage", 100);
}
catch (Exception e)
{
/* ignore */
}
finally
{
completed = true;
if (!cancelled)
{
if (parsingFailed)
{
SwingManipulator.showErrorDialog(
this,
getTitle(),
parent.properties.getString("name") + " could not access this webpage.");
cancelButton.doClick();
}
else
{
synchronized (candidates)
{
if (candidates.isEmpty())
{
SwingManipulator.showWarningDialog(
this,
getTitle(),
parent.properties.getString("name") + " could not find any candidate media file downloads on this webpage.");
cancelButton.doClick();
}
}
}
}
}
}
/**
* Check if the parser state has been modified.
* An exception is thrown if the parser is cancelled.
*
* @throws java.lang.Exception
* if the download is cancelled
*/
private void checkState()
throws Exception
{
if (cancelled)
{
throw new Exception();
}
}
/*****************
* INNER CLASSES *
*****************/
/**
* Represent the model for the table of media file downloads.
*/
private class CandidatesTableModel
extends AbstractTableModel
{
/** name of each column (headers) */
private final String[] columnNames =
{
"Title",
"<html>Local Filename <font color='blue'>(double-click cell to edit)</font></html>"
};
/** class of each column */
private final Class[] columnClasses =
{
TableStringCell.class,
TableStringCell.class
};
/** is each column editable? */
private final boolean[] columnEditable =
{
false,
true
};
/** cached copy of the table cell contents, for rendering */
private final Map<Download,Object[]> contents = new HashMap<Download,Object[]>();
@Override
public int getRowCount()
{
synchronized (candidates)
{
return candidates.size();
}
}
@Override
public int getColumnCount()
{
return columnNames.length;
}
@Override
public String getColumnName(
int col)
{
return columnNames[col];
}
@Override
public Class getColumnClass(
int col)
{
return columnClasses[col];
}
@Override
public boolean isCellEditable(
int row,
int col)
{
return columnEditable[col];
}
@Override
public Object getValueAt(
int row,
int col)
{
final Download d;
synchronized (candidates)
{
d = candidates.get(row);
}
Object[] c = contents.get(d);
if (c == null)
{
c = new Object[]
{
new TableStringCell(d),
new TableStringCell(d)
};
contents.put(d, c);
/* initialize invariant properties of the cell contents */
/* Title */
((TableStringCell) c[0]).align = JLabel.LEFT;
/* Local Filename */
((TableStringCell) c[1]).align = JLabel.LEFT;
}
switch (col)
{
case 0:
/* Title */
((TableStringCell) c[0]).text = "<html>" + d.getTitle() + "</html>";
break;
case 1:
/* Local Filename */
((TableStringCell) c[1]).text = d.getFilename();
break;
default:
return null;
}
return c[col];
}
@Override
public void setValueAt(
Object val,
int row,
int col)
{
final Download d;
synchronized (candidates)
{
d = candidates.get(row);
}
switch (col)
{
case 1:
/* Local Filename */
final boolean success = d.setFilename((String) val);
if (success)
{
fireTableCellUpdated(row, col);
}
else
{
SwingManipulator.showErrorDialog(
WebpageParser.this,
WebpageParser.this.getTitle(),
"The specified filename is invalid.\nIt may contain illegal characters, or be already in use.");
}
break;
default:
}
}
}
/***************************
* NETBEANS-GENERATED CODE *
***************************/
/** This method is called from within the constructor to
* initialize the form.
* WARNING: Do NOT modify this code. The content of this method is
* always regenerated by the Form Editor.
*/
@SuppressWarnings("unchecked")
// <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
private void initComponents() {
webpagePanel = new javax.swing.JPanel();
urlPane = new javax.swing.JScrollPane();
urlText = new javax.swing.JTextArea();
progress = new javax.swing.JProgressBar();
candidatesPanel = new javax.swing.JPanel();
candidatesPane = new javax.swing.JScrollPane();
candidatesTable = new javax.swing.JTable();
buttonsPanel = new javax.swing.JPanel();
allButton = new javax.swing.JButton();
selectedButton = new javax.swing.JButton();
cancelButton = new javax.swing.JButton();
setDefaultCloseOperation(javax.swing.WindowConstants.DO_NOTHING_ON_CLOSE);
webpagePanel.setBorder(javax.swing.BorderFactory.createTitledBorder("Webpage URL"));
webpagePanel.setLayout(new java.awt.BorderLayout());
urlPane.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER);
urlText.setColumns(20);
urlText.setEditable(false);
urlText.setLineWrap(true);
urlText.setRows(3);
urlText.setToolTipText("Webpage URL being parsed");
urlPane.setViewportView(urlText);
webpagePanel.add(urlPane, java.awt.BorderLayout.CENTER);
progress.setStringPainted(true);
webpagePanel.add(progress, java.awt.BorderLayout.PAGE_END);
getContentPane().add(webpagePanel, java.awt.BorderLayout.PAGE_START);
candidatesPanel.setBorder(javax.swing.BorderFactory.createTitledBorder("Candidate Media File Downloads"));
candidatesPanel.setLayout(new java.awt.BorderLayout());
candidatesPane.setPreferredSize(new java.awt.Dimension(600, 200));
candidatesTable.setAutoCreateRowSorter(true);
candidatesTable.setModel(candidatesTableModel);
candidatesPane.setViewportView(candidatesTable);
candidatesPanel.add(candidatesPane, java.awt.BorderLayout.CENTER);
buttonsPanel.setLayout(new java.awt.GridLayout(1, 3));
allButton.setIcon(new javax.swing.ImageIcon(getClass().getResource("/org/freeshell/zs/mediasniper/resources/add.png"))); // NOI18N
allButton.setMnemonic('a');
allButton.setText("Add All");
allButton.setToolTipText("Add all candidate media file downloads");
buttonsPanel.add(allButton);
selectedButton.setIcon(new javax.swing.ImageIcon(getClass().getResource("/org/freeshell/zs/mediasniper/resources/add.png"))); // NOI18N
selectedButton.setMnemonic('s');
selectedButton.setText("Add Selected");
selectedButton.setToolTipText("Add selected candidate media file downloads");
buttonsPanel.add(selectedButton);
cancelButton.setIcon(new javax.swing.ImageIcon(getClass().getResource("/org/freeshell/zs/mediasniper/resources/cross.png"))); // NOI18N
cancelButton.setMnemonic('c');
cancelButton.setText("Cancel");
cancelButton.setToolTipText("Cancel parsing of this webpage");
buttonsPanel.add(cancelButton);
candidatesPanel.add(buttonsPanel, java.awt.BorderLayout.PAGE_END);
getContentPane().add(candidatesPanel, java.awt.BorderLayout.CENTER);
pack();
}// </editor-fold>//GEN-END:initComponents
// Variables declaration - do not modify//GEN-BEGIN:variables
private javax.swing.JButton allButton;
private javax.swing.JPanel buttonsPanel;
private javax.swing.JButton cancelButton;
private javax.swing.JScrollPane candidatesPane;
private javax.swing.JPanel candidatesPanel;
private javax.swing.JTable candidatesTable;
private javax.swing.JProgressBar progress;
private javax.swing.JButton selectedButton;
private javax.swing.JScrollPane urlPane;
private javax.swing.JTextArea urlText;
private javax.swing.JPanel webpagePanel;
// End of variables declaration//GEN-END:variables
}