<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Please Scoop Me!</title>
	<atom:link href="http://pleasescoopme.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://pleasescoopme.com</link>
	<description>Jonathan&#039;s Research Blog</description>
	<lastBuildDate>Sat, 08 May 2010 18:09:48 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='pleasescoopme.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://1.gravatar.com/blavatar/9b6c8d810609417e89ab6cb1082e3677?s=96&#038;d=http://s2.wp.com/i/buttonw-com.png</url>
		<title>Please Scoop Me!</title>
		<link>http://pleasescoopme.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://pleasescoopme.com/osd.xml" title="Please Scoop Me!" />
	<atom:link rel='hub' href='http://pleasescoopme.com/?pushpress=hub'/>
		<item>
		<title>A new home for Facebook data team publications</title>
		<link>http://pleasescoopme.com/2010/05/08/a-new-home-for-facebook-data-team-publications/</link>
		<comments>http://pleasescoopme.com/2010/05/08/a-new-home-for-facebook-data-team-publications/#comments</comments>
		<pubDate>Sat, 08 May 2010 18:09:48 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=569</guid>
		<description><![CDATA[Brendan (who will be joining us this summer, natch!) pointed out yesterday that we don&#8217;t have a repository for the papers we&#8217;ve published here at Facebook. We moved fast. Now you can find them by clicking on the &#8220;Papers&#8221; tab of the Facebook Data page. Happy reading everyone!<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=569&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>Brendan (who will be joining us this summer, natch!) pointed out yesterday that we don&#8217;t have a repository for the papers we&#8217;ve published here at Facebook.   We moved fast.  Now you can find them by clicking on the <a href="http://www.facebook.com/data?v=app_4949752878">&#8220;Papers&#8221; tab of the Facebook Data page</a>.  Happy reading everyone!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/569/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/569/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/569/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/569/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/569/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/569/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/569/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/569/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=569&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/05/08/a-new-home-for-facebook-data-team-publications/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>
	</item>
		<item>
		<title>Oh god, now there&#8217;s another video of me online</title>
		<link>http://pleasescoopme.com/2010/05/07/oh-god-now-theres-another-video-of-me-online/</link>
		<comments>http://pleasescoopme.com/2010/05/07/oh-god-now-theres-another-video-of-me-online/#comments</comments>
		<pubDate>Fri, 07 May 2010 23:01:06 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=565</guid>
		<description><![CDATA[Recently I got to participate on a panel / give a talk as a part of the NAE Seattle Grand Challenges Summit. Let me thank Ed Lazowska for putting together such a great panel &#8212; Alon Halevy, Larry Smarr and Catharine van Ingen. I think I got a contact high just from being around such [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=565&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>Recently I got to participate on a panel / give a talk as a part of the <a href="http://www.engr.washington.edu/news/nae10/index.html">NAE Seattle Grand Challenges Summit</a>.  Let me thank Ed Lazowska for putting together such a great panel &#8212; Alon Halevy, Larry Smarr and Catharine van Ingen.  I think I got a contact high just from being around such awesome researchers.</p>
<p>Anyhow, a video has surfaced of my talk.  I would recommend against watching it, unless you want to see me nebbish my way through a five minute talk.</p>
<p><span style="text-align:center; display: block;"><a href="http://pleasescoopme.com/2010/05/07/oh-god-now-theres-another-video-of-me-online/"><img src="http://img.youtube.com/vi/Li8zjTJ0ItY/2.jpg" alt="" /></a></span></p>
<p>There&#8217;s also some more coverage <a href="http://seattletimes.nwsource.com/html/technologybrierdudleysblog/2011771001_looming_data_tsunami_coming_uw.html">here</a>.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/565/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=565&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/05/07/oh-god-now-theres-another-video-of-me-online/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://img.youtube.com/vi/Li8zjTJ0ItY/2.jpg" medium="image" />
	</item>
		<item>
		<title>Slides from some recent talks</title>
		<link>http://pleasescoopme.com/2010/04/28/slides-from-some-recent-talks/</link>
		<comments>http://pleasescoopme.com/2010/04/28/slides-from-some-recent-talks/#comments</comments>
		<pubDate>Wed, 28 Apr 2010 00:19:04 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=555</guid>
		<description><![CDATA[Recently I had the honor of being invited to give a couple of talks in the Boston area. One at NESCAI and one at NESS. I had a great time and the feedback from the audiences was awesome. A shout out to Jeff of search engine cafe is in order. I also want to especially [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=555&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>Recently I had the honor of being invited to give a couple of talks in the Boston area.  One at <a href="http://nescai.cs.umass.edu/index.php">NESCAI</a> and one at <a href="http://www.stat.harvard.edu/NESS10/index.htm">NESS</a>.  I had a great time and the feedback from the audiences was awesome.  A shout out to Jeff of <a href="http://www.searchenginecaffe.com/">search engine cafe</a> is in order.  I also want to especially thank David/Sameer and Edo for inviting me and for putting together such great programs!</p>
<p>I have uploaded the slides for these talks <a href='http://slycoder.files.wordpress.com/2010/04/ness-distributed1.pdf'>here</a>.</p>
<p>I&#8217;m also going to be on a panel for <a href="http://www.engr.washington.edu/news/nae10/index.html">NAE&#8217;s Grand Challenges Summit</a> next Monday.  If you&#8217;re going to be in the Seattle area, stop by and say hi!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/555/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/555/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/555/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/555/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/555/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/555/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/555/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/555/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=555&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/04/28/slides-from-some-recent-talks/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>
	</item>
		<item>
		<title>Using jjplot to explore tipping behavior</title>
		<link>http://pleasescoopme.com/2010/03/31/using-jjplot-to-explore-tipping-behavior/</link>
		<comments>http://pleasescoopme.com/2010/03/31/using-jjplot-to-explore-tipping-behavior/#comments</comments>
		<pubDate>Wed, 31 Mar 2010 20:56:54 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=544</guid>
		<description><![CDATA[In this post, I&#8217;ll show off some recent changes to jjplot that we think are really cool. To help motivate these changes, I&#8217;ll walk through them using the tips dataset included with the reshape package. Improved faceting along multiple dimensions. This shows a scatter plot of how much males and females tip on each day [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=544&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>In this post, I&#8217;ll show off some recent changes to jjplot that we think are really cool.  To help motivate these changes, I&#8217;ll walk through them using the tips dataset included with the reshape package.  </p>
<ul>
<li> Improved faceting along multiple dimensions.  This shows a scatter plot of how much males and females tip on each day of the week, along with a best-fit lines.  The black, dashed line shows the best-fit across all data points.  Points/lines are otherwise colored by day.  I&#8217;ll leave it to you to guess why the slope is higher for men on Saturday, but lower on Sunday.<br />
<code><br />
jjplot(tip ~ (abline() : group(fit(), by = day: sex) +<br />
              point(alpha = 0.5)) : color(day) +<br />
       abline(lty = "dashed") : fit() + total_bill,<br />
       data = tips,<br />
       facet.y = day, facet.x = sex)<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/tip_scatter.png"><img src="http://slycoder.files.wordpress.com/2010/03/tip_scatter.png?w=500&#038;h=619" alt="" title="tip_scatter" width="500" height="619" class="aligncenter size-full wp-image-547" /></a></p>
<li> New stats/geoms such as area/density.   Here we&#8217;ll make a density plot of the tip fraction, that is, the tip amount over the total bill.   The black density shows the overall density, while each each overlaid density shows the density just for points in that panel.<br />
<code><br />
jjplot(~ area() : group(density(), by = day:sex) : color(day, alpha = 0.5) +<br />
       area() : group(density(), by = day) +<br />
       I(tip / total_bill),<br />
       data = tips,<br />
       facet.y = day, facet.x = sex,<br />
       xlab = "tip fraction",<br />
       ylab = "")<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/tip_density.png"><img src="http://slycoder.files.wordpress.com/2010/03/tip_density.png?w=500&#038;h=619" alt="" title="tip_density" width="500" height="619" class="aligncenter size-full wp-image-546" /></a></p>
<li> Custom geoms/stats.  We want to make it easier for the community to augment the system.  Right now, the syntax is still sort of opaque and we&#8217;re working on it, but you can already get a custom stat just be naming your function jjplot.stat.*.  For example, below we define a new kmeans stat.  We then cluster the points and draw a best-fit line for each cluster.<br />
<code><br />
jjplot.stat.kmeans &lt;- function(state, K, use.y = FALSE) {<br />
  if (use.y) {<br />
    km &lt;- kmeans(cbind(state$data$x, state$data$y), K)<br />
  } else {<br />
    km &lt;- kmeans(state$data$x, K)<br />
  }<br />
  state$data$cluster &lt;- factor(km$cluster)<br />
  state<br />
}<br />
jjplot(tip ~ point() +<br />
       abline() : group(fit(), cluster) : kmeans(3) +<br />
       total_bill,<br />
       data = tips)<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/tip_kmeans.png"><img src="http://slycoder.files.wordpress.com/2010/03/tip_kmeans.png?w=500&#038;h=394" alt="" title="tip_kmeans" width="500" height="394" class="aligncenter size-full wp-image-551" /></a></p>
<li> Coloring on derived statistics.  You may have noticed in the earlier examples that the color syntax has changed.  We figured color should be kind of like sort &#8212; it&#8217;s a pseudo-statistic which can be inserted anywhere in a statistics stack.  This means that it becomes easy to color based off of derived statistics.  In this example, we make the previous plot much more useful by coloring the fits and points according to the assigned cluster.<br />
<code><br />
jjplot(tip ~ (point() +<br />
       abline() : group(fit(), cluster)) : color(cluster) : kmeans(3) +<br />
       total_bill,<br />
       data = tips)<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/tips_kmeans_color.png"><img src="http://slycoder.files.wordpress.com/2010/03/tips_kmeans_color.png?w=500&#038;h=394" alt="" title="tips_kmeans_color" width="500" height="394" class="aligncenter size-full wp-image-552" /></a>
</ul>
<p>Let us know what you think!  P.S. A release on CRAN is coming very soon&#8230;</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/544/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/544/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/544/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/544/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/544/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/544/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/544/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/544/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=544&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/03/31/using-jjplot-to-explore-tipping-behavior/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/tip_scatter.png" medium="image">
			<media:title type="html">tip_scatter</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/tip_density.png" medium="image">
			<media:title type="html">tip_density</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/tip_kmeans.png" medium="image">
			<media:title type="html">tip_kmeans</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/tips_kmeans_color.png" medium="image">
			<media:title type="html">tips_kmeans_color</media:title>
		</media:content>
	</item>
		<item>
		<title>ePluribus: Ethnicity on Social Networks</title>
		<link>http://pleasescoopme.com/2010/03/22/epluribus-ethnicity-on-social-networks/</link>
		<comments>http://pleasescoopme.com/2010/03/22/epluribus-ethnicity-on-social-networks/#comments</comments>
		<pubDate>Mon, 22 Mar 2010 02:06:26 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=542</guid>
		<description><![CDATA[is the name of the paper I wrote with Lars, Itamar, and Cameron. It will appear at this year&#8217;s ICWSM. You may commence bating those breaths.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=542&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>is the name of the paper I wrote with Lars, Itamar, and Cameron.   It will appear at this year&#8217;s <a href="http://icwsm.org/2010/papers.shtml">ICWSM</a>.  You may commence bating those breaths.  </p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/542/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/542/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/542/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/542/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/542/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/542/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/542/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/542/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=542&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/03/22/epluribus-ethnicity-on-social-networks/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>
	</item>
		<item>
		<title>A few jjplot updates</title>
		<link>http://pleasescoopme.com/2010/03/13/a-few-jjplot-updates/</link>
		<comments>http://pleasescoopme.com/2010/03/13/a-few-jjplot-updates/#comments</comments>
		<pubDate>Sat, 13 Mar 2010 22:02:12 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=519</guid>
		<description><![CDATA[Eytan and I have been actively exploring lots of crazy new ideas in jjplot, a new plotting library for R. Here&#8217;s a quick rundown of recent changes. We&#8217;d love to hear what you guys think Formulae. The old way of expressing the series of geoms and stats that form the plot was cumbersome. Putting a [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=519&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>Eytan and I have been actively exploring lots of crazy new ideas in <a href="http://code.google.com/p/jjplot/">jjplot</a>, a new plotting library for R.  Here&#8217;s a quick rundown of recent changes.  We&#8217;d love to hear what you guys think</p>
<ol>
<li> <b>Formulae</b>.  The old way of expressing the series of geoms and stats that form the plot was cumbersome.   Putting a series of commands in the &#8230; leads to annoying issues such as poorer error handling.  More importantly, because it can only express a series of statements, it becomes unclear which stats affect which geoms, making it impossible to express more complicated combinations.
<p>We believe that formulae are a good solution to this.  Layers are separated by &#8216;+&#8217; operations.   Interactions between stats and geoms are expressed via the interaction operator &#8216;:&#8217;.  This allows us to gracefully express arbitrary trees of stats and geoms.  An example of a jittered scatter plot:</p>
<p>Old:<br />
<code><br />
jjplot(x = x, y = y, data = data,<br />
         jjplot.jitter(xfactor=1),<br />
         jjplot.point())<br />
</code><br />
New:<br />
<code><br />
jjplot(y ~ point() : jitter(xfactor = 1) + x, data = data)<br />
</code><br />
The leftmost and rightmost terms correspond to the y and x aesthetics.  For a simple case such as this, formulae might not seem like much of an improvement.  But consider a more complex example:<br />
<code><br />
jjplot( ~ line(lty="dashed", col = "red") : hist() +<br />
              bar(width = 0.1) : hist() : jitter(xfactor = 1) +<br />
              Sepal.Length, data = iris)<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/stacked_stats.png"><img src="http://slycoder.files.wordpress.com/2010/03/stacked_stats.png?w=500&#038;h=375" alt="" title="stacked_stats" width="500" height="375" class="aligncenter size-full wp-image-522" /></a><br />
Reading from the right, this says to take iris$Sepal.Length, jitter it, bin the data, and bar plot the result.  This is cool because it&#8217;s immediately clear that you&#8217;re stacking stats, plotting a histogram of the <i>jittered</i> data.  The first term does the same thing, except that it does a hist() statistic <i>without</i> the jitter, and draws this as a red line.   </p>
<p>By using parentheses, you can also apply a stat to multiple stats/geoms.<br />
<code><br />
jjplot( ~ (point(col = "blue", size=3) +<br />
                       line(col = "red", lty="dashed") +<br />
                       bar(width=0.25)) : hist() +<br />
       Petal.Length, data = iris)<br />
</code><br />
Here we&#8217;re just plotting a histogram but with some extra geoms on top for some extra flair.<br />
<a href="http://slycoder.files.wordpress.com/2010/03/stacked_geoms.png"><img src="http://slycoder.files.wordpress.com/2010/03/stacked_geoms.png?w=500&#038;h=375" alt="" title="stacked_geoms" width="500" height="375" class="aligncenter size-full wp-image-537" /></a><br />
We think this notation is a simple and elegant way of expressing what interacts with what.</p>
<li> <b>Facets</b> This way of thinking about facets is somewhat controversial among us.  Normally, facets conflate two concepts: how you compute statistics and how you plot them.   This means that you compute statistics on facet subsets, then you plot each subset in a separate panel.   Well, currently jjplot takes a different tack, treating facets as merely a command to plot different subsets of the data in different panels.  To see what this implies, consider<br />
<code><br />
df &lt;- data.frame(state = rownames(state.x77),<br />
                 region = state.region,<br />
                 state.x77)<br />
jjplot(Murder ~ abline(lty = &quot;dashed&quot;) : fit() +<br />
       abline() : group(fit(), by = region) +<br />
       point() + Income,<br />
       data = df, color = region, facet = region)<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/faceted_stats.png"><img src="http://slycoder.files.wordpress.com/2010/03/faceted_stats.png?w=500&#038;h=375" alt="" title="faceted_stats" width="500" height="375" class="aligncenter size-full wp-image-527" /></a><br />
The first two terms simply do a scatter plot.  The next line does lm fits on each subset.  Note that you have to be explicit with the grouping.  With old semantics, you&#8217;d have an implicit group by on the facet variable, but because we aren&#8217;t combining the grouping and the faceting anymore, you have to spell it out.  The first line shows you the effect of leaving out the grouping operator: you get a fit over all the data that appears on all panels.  This is something I&#8217;ve always wanted to do and it seems to also be persistent question on stack overflow (e.g., &#8220;how do I draw a line at the facet/global mean on each facet panel?&#8221;).  Hopefully this formulation makes it obvious.</p>
<li> <b>Sorting</b> Another persistent question is how to perform sorting on factor scales.  Because of the ease of stacking stats in the formula formulation, we think it makes sense to add a few special stats/geoms.  One of them is the sort stat.  This performs an identity operation on the data frame but also appends some metadata about how to order things which is then intercepted when the scales are created.  Here are some usage examples:<br />
<code><br />
df &lt;- data.frame(name = factor(letters),<br />
                 value = rnorm(26 * 6),<br />
                 type = rep(factor(month.name[1:6]), each = 26))<br />
jjplot(name ~ point() + value,<br />
       data = df, color = type, facet = type)<br />
</code><br />
The first plot is the data unsorted.<br />
<a href="http://slycoder.files.wordpress.com/2010/03/sorted_stats001.png"><img src="http://slycoder.files.wordpress.com/2010/03/sorted_stats001.png?w=480&#038;h=480" alt="" title="sorted_stats001" width="480" height="480" class="aligncenter size-full wp-image-532" /></a><br />
<code><br />
jjplot(name ~ point() : sort(y = value) + value,<br />
       data = df, color = type, facet = type)<br />
</code><br />
The second plot sorts according to the mean value associated with each factor across all facets (remember no grouping!).  Like relevel, the sort statistic can take a function argument to specify how multiple points should be sorted.<br />
<a href="http://slycoder.files.wordpress.com/2010/03/sorted_stats002.png"><img src="http://slycoder.files.wordpress.com/2010/03/sorted_stats002.png?w=480&#038;h=480" alt="" title="sorted_stats002" width="480" height="480" class="aligncenter size-full wp-image-533" /></a><br />
<code><br />
jjplot(name ~ point() : group(sort(y = value), by=type) + value,<br />
       data = df, color = type, facet = type)<br />
</code><br />
The last plot wraps the sort in a group by, meaning that each facet panel has its own sorting order.<br />
<a href="http://slycoder.files.wordpress.com/2010/03/sorted_stats003.png"><img src="http://slycoder.files.wordpress.com/2010/03/sorted_stats003.png?w=480&#038;h=480" alt="" title="sorted_stats003" width="480" height="480" class="aligncenter size-full wp-image-534" /></a>
</ol>
<p>All of this awesomeness is available in the current svn repo.  Check it out!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/519/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/519/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/519/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/519/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/519/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/519/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/519/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/519/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=519&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/03/13/a-few-jjplot-updates/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/stacked_stats.png" medium="image">
			<media:title type="html">stacked_stats</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/stacked_geoms.png" medium="image">
			<media:title type="html">stacked_geoms</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/faceted_stats.png" medium="image">
			<media:title type="html">faceted_stats</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/sorted_stats001.png" medium="image">
			<media:title type="html">sorted_stats001</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/sorted_stats002.png" medium="image">
			<media:title type="html">sorted_stats002</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/sorted_stats003.png" medium="image">
			<media:title type="html">sorted_stats003</media:title>
		</media:content>
	</item>
		<item>
		<title>R LDA package updated to version 1.2 and an ideal-point model for political blogs</title>
		<link>http://pleasescoopme.com/2010/03/08/r-lda-package-updated-to-version-1-2-and-an-ideal-point-model-for-political-blogs/</link>
		<comments>http://pleasescoopme.com/2010/03/08/r-lda-package-updated-to-version-1-2-and-an-ideal-point-model-for-political-blogs/#comments</comments>
		<pubDate>Mon, 08 Mar 2010 17:45:55 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=488</guid>
		<description><![CDATA[I&#8217;ve been on a bit of a R tear lately. Today you should see a new version of the R lda package. This version has lots of fixes including a working mmsb demo with the latest version of ggplot2, corrected RTM code, improved likelihood reporting, better documentation, and much more. Grab it from CRAN today! [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=488&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>I&#8217;ve been on a bit of a R tear lately.  Today you should see a new version of the R lda package.  This version has lots of fixes including a working mmsb demo with the latest version of ggplot2, corrected RTM code, improved likelihood reporting, better documentation, and much more.  Grab it from <a href="http://cran.r-project.org/web/packages/lda/">CRAN</a> today!  Special thanks to the following people for bug reports/feature requests (sorry if I forgot anyone):</p>
<ul>
<li>Edo Airoldi
<li>Jordan Boyd-Graber
<li>Khalid El-Arini
<li>Roger Levy
<li>Solomon Messing
<li>Joerg Reichardt
</ul>
<p>One of the new features is a method to make sLDA predictions on response variables conditioned on documents.  In the demo accompanying the package, I fit an sLDA model to a corpus of political blogs tagged as being either liberal or conservative.  With this fitted model, I can now use the new predict method to predict the political bent of each of the blogs within a continuous space.  The density plot of these predictions is given below, broken down by the the original conservative/liberal label (color of shading).</p>
<p><a href="http://slycoder.files.wordpress.com/2010/02/slda-predict.png"><img src="http://slycoder.files.wordpress.com/2010/02/slda-predict.png?w=500&#038;h=500" alt="" title="An ideal point model for political blogs" width="500" height="500" class="aligncenter size-full wp-image-489" /></a></p>
<p>I like how there&#8217;s some bimodality for each contingency &#8212; a moderate group and a more extreme group.  The model also predicts a heavy tail of super-conservative blogs.  There is a real notable bump down by -3.   I dunno if this represents reality; it&#8217;s probably worthwhile to do more extensive model checking.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/488/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/488/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/488/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/488/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/488/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/488/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/488/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/488/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=488&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/03/08/r-lda-package-updated-to-version-1-2-and-an-ideal-point-model-for-political-blogs/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/02/slda-predict.png" medium="image">
			<media:title type="html">An ideal point model for political blogs</media:title>
		</media:content>
	</item>
		<item>
		<title>jjplot: Yet another plotting library for R</title>
		<link>http://pleasescoopme.com/2010/03/07/jjplot-yet-another-plotting-library-for-r/</link>
		<comments>http://pleasescoopme.com/2010/03/07/jjplot-yet-another-plotting-library-for-r/#comments</comments>
		<pubDate>Sun, 07 Mar 2010 09:30:25 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=496</guid>
		<description><![CDATA[Those of you who follow this blog know that making (somewhat) pretty plots is an abiding interest of mine. Many of the plots I&#8217;ve made in the past were done using the great ggplot2 package. But recently Eytan Bakshy and I have been tinkering with our own plotting library, jjplot, as a playground for various [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=496&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>Those of you who follow this blog know that making (somewhat) pretty plots is an abiding interest of mine.  Many of the plots I&#8217;ve made in the past were done using the great ggplot2 package.  But recently <a href="http://www-personal.umich.edu/~ebakshy/ebakshy/Home.html">Eytan Bakshy</a> and I have been tinkering with our own plotting library, jjplot, as a playground for various ideas we&#8217;ve had.   As the name indicates, it is heavily inspired by hadley&#8217;s library.  Our library doesn&#8217;t do quite as much as ggplot2, and ours is liable to be much buggier.  But it&#8217;s still fun to play with.  Here are some examples of what jjplot can do:</p>
<ul>
<li> Bar plots with fills controlled by the values.<br />
<code><br />
df &lt;- data.frame(x = 1:50, y = rnorm(50))<br />
jjplot(x, y, data = df, fill = y, jjplot.bar(col = &quot;black&quot;))<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/jjplot_test001.png"><img src="http://slycoder.files.wordpress.com/2010/03/jjplot_test001.png?w=480&#038;h=480" alt="" title="jjplot_test001" width="480" height="480" class="aligncenter size-full wp-image-497" /></a></p>
<li> Boxplots.<br />
<code><br />
df &lt;- data.frame(state = rownames(state.x77), region = state.region, state.x77)<br />
jjplot(region, Income, data = df, fill = region, jjplot.group(jjplot.quantile(), by = region), jjplot.box())<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/jjplot_test003.png"><img src="http://slycoder.files.wordpress.com/2010/03/jjplot_test003.png?w=480&#038;h=480" alt="" title="jjplot_test003" width="480" height="480" class="aligncenter size-full wp-image-498" /></a></p>
<li> Scatter plot, colored by factor, with alpha blending.  This also demonstrates how statistics can be used to visualize different aspects of the data simultaneously.<br />
<code><br />
df &lt;- data.frame(x = rnorm(10000) + (1:4) * 1, f = factor(c(&#39;A&#39;, &#39;B&#39;, &#39;C&#39;, &#39;D&#39;)))<br />
df$y &lt;- c(-6, -2, 2, 4) * df$x + rnorm(10000)<br />
jjplot(x + 2, y, data = df, alpha = 0.10, color = f, jjplot.point(), jjplot.group(jjplot.fit(), by = f), jjplot.abline(), jjplot.fun.y(mean), jjplot.hline(lty = &quot;dashed&quot;))<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/jjplot_test008.png"><img src="http://slycoder.files.wordpress.com/2010/03/jjplot_test008.png?w=480&#038;h=480" alt="" title="jjplot_test008" width="480" height="480" class="aligncenter size-full wp-image-499" /></a></p>
<li> An example of log scales and the CCDF statistic.<br />
<code><br />
df &lt;- data.frame(x=rlnorm(1000,2,2.5))<br />
jjplot(x, data = df, jjplot.ccdf(density=TRUE), jjplot.point(), log=&#39;xy&#39;)<br />
</code><br />
<a href="http://slycoder.files.wordpress.com/2010/03/jjplot_test009.png"><img src="http://slycoder.files.wordpress.com/2010/03/jjplot_test009.png?w=480&#038;h=480" alt="" title="jjplot_test009" width="480" height="480" class="aligncenter size-full wp-image-500" /></a>
</ul>
<p>Lots more demos and documentation are <a href="http://code.google.com/p/jjplot/">here</a>.  To install visit <a href="http://jjplot.googlecode.com/files/jjplot_1.0.tar.gz">http://jjplot.googlecode.com/files/jjplot_1.0.tar.gz</a> and install the downloaded package using<br />
<code><br />
R CMD INSTALL jjplot_1.0.tar.gz<br />
</code><br />
We&#8217;re eager to hear your feedback!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/496/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/496/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/496/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/496/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/496/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/496/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/496/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/496/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=496&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/03/07/jjplot-yet-another-plotting-library-for-r/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/jjplot_test001.png" medium="image">
			<media:title type="html">jjplot_test001</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/jjplot_test003.png" medium="image">
			<media:title type="html">jjplot_test003</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/jjplot_test008.png" medium="image">
			<media:title type="html">jjplot_test008</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/03/jjplot_test009.png" medium="image">
			<media:title type="html">jjplot_test009</media:title>
		</media:content>
	</item>
		<item>
		<title>Axl Rose by any other name&#8230;</title>
		<link>http://pleasescoopme.com/2010/01/24/axl-rose-by-any-other-name/</link>
		<comments>http://pleasescoopme.com/2010/01/24/axl-rose-by-any-other-name/#comments</comments>
		<pubDate>Sun, 24 Jan 2010 23:22:04 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=474</guid>
		<description><![CDATA[In a post a while ago, I wondered how much info about the band one could glean just by looking at the name. I mean, shouldn&#8217;t it be obvious that a band named &#8220;Trauma&#8221; should be heavy metal? This was the genesis of a collaboration between me and Matt Hoffman. We wanted to see if [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=474&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>In <a href="http://pleasescoopme.com/2009/02/22/whats-in-a-name/">a post a while ago</a>, I wondered how much info about the band one could glean just by looking at the name.  I mean, shouldn&#8217;t it be obvious that a band named &#8220;Trauma&#8221; should be heavy metal?</p>
<p>This was the genesis of a collaboration between me and <a href="http://www.cs.princeton.edu/~mdhoffma/">Matt Hoffman</a>.   We wanted to see if you could improve genre prediction using the names of the bands.  Unfortunately, neither of us had enough time to really get this project going, but I thought I&#8217;d share what results we did get in hopes that someone else will pick up the torch. </p>
<p>To start off, we need a large training set of band/genre mappings.  We opted for the DBPedia Infobox mine that you can find at <a href="http://infochimps.org/">infochimps</a>.  (For those who don&#8217;t know, they&#8217;ve done some awesome data mining to grab all the structured info from Wikipedia infoboxes).  I did some cleaning up and have put up the list of <a href="https://docs.google.com/a/topicmodels.net/leaf?id=0B_m9_LMHnK7dNmQ3ODk3OTYtNjExYi00N2JlLWE2NGItYTk5ZDU1M2EwYjg2&amp;sort=name&amp;layout=list&amp;num=50">artists</a> and <a href="https://docs.google.com/a/topicmodels.net/leaf?id=0B_m9_LMHnK7dMDg1ZmFjM2QtNWI4OC00YTM2LTkwMjQtYmZlOGYxOTdjMjJm&amp;sort=name&amp;layout=list&amp;num=50">genres</a> (the artist in each line of the first file is associated with the genres on the corresponding line of the second file).</p>
<p>You might have noticed that Wikipedia is pretty crazy when it comes to genre definitions (because god forbid we confuse Melodic Death Metal and Power Metal).  This craziness makes it hard to map the artists to any canonicalized genre set (such as CAL-500).  I tried a bunch of techniques to do this canonicalization (including doing my own crawl of Wikipedia with all sorts of heuristics).  None of it worked very well for mapping genres to a canonicalized set, but it did let me make a <a href='http://slycoder.files.wordpress.com/2010/01/meow.pdf'>really cool graph of connections between genres</a>.  Eventually, we came to the conclusion that we needed human judgments.  We got mechanical turkers to label Wikipedia genres with CAL-500 genres.  Those results are <a href="https://spreadsheets.google.com/ccc?key=0Avm9_LMHnK7ddGJDS18tWUJPbTkwY2h1Y3BBT3R5bGc&amp;hl=fr">here.</a>  </p>
<p>With that training set in place, I decided to explore the data to see if there truly were correlations between substrings of artist names and genres.   The plot below shows the prevalence in each genre of artists containing &#8220;death&#8221; (red) or &#8220;boyz&#8221; (blue) in their name.   The green dots show the overall distribution of genres among artists in Wikipedia.  </p>
<p><a href="http://slycoder.files.wordpress.com/2010/01/genre_plot.png"><img src="http://slycoder.files.wordpress.com/2010/01/genre_plot.png?w=500&#038;h=500" alt="" title="genre_plot" width="500" height="500" class="aligncenter size-full wp-image-479" /></a></p>
<p>The graph shows that bands containing &#8220;death&#8221; in their name are much more likely to be Rock, Alternative, Metal/Hard Rock,  or Alternative.  Conversely, they are less likely to be Jazz, Hip-Hop, or Soul.  In contrast, bands containing &#8220;boyz&#8221; in their name are overwhelmingly Hip-Hop.  This confirmed my intuition and seemed promising to me, so we went ahead and developed a classifier for the CAL-500 data set.  The techniques we tried were:</p>
<ul>
<li><b>names (corrLDA)</b>- the <a href="http://www.cs.princeton.edu/~blei/papers/BleiLafferty2006.pdf">correlated topic model</a> fit to the Wikipedia data.  Predictions use only names.
<li><b>names (NB)</b> &#8211; naive Bayes fit to the Wikipedia data.  Predictions use only names.
<li><b>names (LR)</b> &#8211; logistic regression fit to the Wikipedia data.  Predictions use only names.
<li><b>baseline</b> &#8211; Predictions use the baseline frequency of genres on Wikipedia.  Predictions do not use any information about the instances.
<li><b>svm</b> &#8211; SVM fit using MFCC features.  Predictions use both names and audio.
<li><b>svm + names (corrLDA)</b> &#8211; SVM fit using MFCC features plus the results of names (corrLDA).  Predictions use both names and audio.
<li><b>svm + names (NB)</b>- SVM fit using MFCC features plus the results of names (NB).  Predictions use both names and audio.
<li><b>svm + names (LR)</b>- SVM fit using MFCC features plus the results of names (LR).  Predictions use both names and audio.
</ul>
<p>The plot below shows the precision-recall for each of these techniques.   As you can see, it&#8217;s not very promising.  The SVM will outclass any technique which uses the name along; otherwise all of the name techniques look about the same.  It looks like we might get a small bump by combining SVM with names (LR) but it&#8217;s hard to tell.<br />
<a href="http://slycoder.files.wordpress.com/2010/01/pr-all.png"><img src="http://slycoder.files.wordpress.com/2010/01/pr-all.png?w=500&#038;h=350" alt="" title="pr.all" width="500" height="350" class="aligncenter size-full wp-image-480" /></a><br />
But precision-recall may not be the right metric.  After all, pop and rock are so frequent that you will probably predict pop for every single item in the test set before you even make any other prediction.  Something which is perhaps more meaningful is to look at the rank of the correct labels on a per-test-instance level; the lower the rank, the better the model is at making predictions.   Boxplots of the ranks are given below.<br />
<a href="http://slycoder.files.wordpress.com/2010/01/rank-all.png"><img src="http://slycoder.files.wordpress.com/2010/01/rank-all.png?w=500&#038;h=350" alt="" title="rank.all" width="500" height="350" class="aligncenter size-full wp-image-481" /></a><br />
We see slightly different patterns when we look at the ranks.   Without using any audio data, the naive Bayes technique performs best and manages to get a non-trivial bump beyond the baseline.   When audio is included, the names add something, but not much.  Interestingly, the names (LR) technique which looked like it might help us at precision-recall actually does a bit worse when you look at the rank.   On the other hand, SVM + names (corrLDA) has the same median as SVM, but manages to do a better job at some of the difficult-to-predict cases, leading to a smaller interquartile range. </p>
<p>In sum, names give us something &#8212; unfortunately, it&#8217;s not a whole lot.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/474/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/474/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/474/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/474/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/474/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/474/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/474/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/474/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=474&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/01/24/axl-rose-by-any-other-name/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/01/genre_plot.png" medium="image">
			<media:title type="html">genre_plot</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/01/pr-all.png" medium="image">
			<media:title type="html">pr.all</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/01/rank-all.png" medium="image">
			<media:title type="html">rank.all</media:title>
		</media:content>
	</item>
		<item>
		<title>The cost of a sample</title>
		<link>http://pleasescoopme.com/2010/01/23/the-cost-of-a-sample/</link>
		<comments>http://pleasescoopme.com/2010/01/23/the-cost-of-a-sample/#comments</comments>
		<pubDate>Sat, 23 Jan 2010 23:03:49 +0000</pubDate>
		<dc:creator>slycoder</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://pleasescoopme.com/?p=441</guid>
		<description><![CDATA[I once heard it on good authority that Gelman says you usually don&#8217;t need more than 12 samples. Well, as a result of a discussion with Sam Gershman (sorry Sam for not answering the actual question you asked!), I wondered if that was true; that is, if under reasonable assumptions it might be better to [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=441&subd=slycoder&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<p>I once heard it on <a href="http://www.cfa.harvard.edu/~kmandel/">good authority</a> that <a href="http://www.stat.columbia.edu/~gelman/blog/">Gelman</a> says you usually don&#8217;t need more than 12 samples.  Well, as a result of a discussion with <a href="http://www.princeton.edu/~sjgershm/">Sam Gershman</a> (sorry Sam for not answering the actual question you asked!), I wondered if that was true; that is, if under reasonable assumptions it might be better to take a small number of samples.  Caveat: there&#8217;s probably lots of work on this already, but where would the fun be in that?</p>
<p>Ok, let&#8217;s assume that your goal is to estimate <img src='http://l.wordpress.com/latex.php?latex=%5Cmathbb%7BE%7D_%7Bz+%5Csim+p%28z+%7C+x%29%7D%5Bf%28z%29%5D&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\mathbb{E}_{z \sim p(z | x)}[f(z)]' title='\mathbb{E}_{z \sim p(z | x)}[f(z)]' class='latex' />, where <img src='http://l.wordpress.com/latex.php?latex=p%28z+%7C+x%29&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='p(z | x)' title='p(z | x)' class='latex' /> represents some distribution on hidden variables over which you are trying to compute a function, <img src='http://l.wordpress.com/latex.php?latex=f&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='f' title='f' class='latex' />.   For the usual reasons, it&#8217;s intractable to compute this exactly, so you&#8217;re going to use a sampler.  Let&#8217;s assume</p>
<ul>
<li><b>that your sampler has mixed and that you&#8217;re getting independent samples</b> (that condition alone should give you fair warning that what I&#8217;m about to say is of little practical value);
<li><img src='http://l.wordpress.com/latex.php?latex=f&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='f' title='f' class='latex' /> is bounded (say between 0 and 1);
<li> to obtain <img src='http://l.wordpress.com/latex.php?latex=n&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='n' title='n' class='latex' /> samples from the sampler costs some amount, say <img src='http://l.wordpress.com/latex.php?latex=R%28n%29&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='R(n)' title='R(n)' class='latex' />.
</ul>
<p>More samples are usually better, because they&#8217;ll give you a better representation of the true distribution, i.e. <img src='http://l.wordpress.com/latex.php?latex=%5Cmathbb%7BE%7D_%7Bz+%5Csim+%5Chat%7Bp_n%7D%28z+%7C+x%29%7D%5Bf%28z%29%5D+%5Crightarrow+%5Cmathbb%7BE%7D_%7Bz+%5Csim+p%28z+%7C+x%29%7D%5Bf%28z%29%5D&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\mathbb{E}_{z \sim \hat{p_n}(z | x)}[f(z)] \rightarrow \mathbb{E}_{z \sim p(z | x)}[f(z)]' title='\mathbb{E}_{z \sim \hat{p_n}(z | x)}[f(z)] \rightarrow \mathbb{E}_{z \sim p(z | x)}[f(z)]' class='latex' />, where <img src='http://l.wordpress.com/latex.php?latex=%5Chat%7Bp_n%7D&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\hat{p_n}' title='\hat{p_n}' class='latex' /> is the distribution obtained by using <img src='http://l.wordpress.com/latex.php?latex=n&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='n' title='n' class='latex' /> samples.  Unfortunately, more samples come at a cost here, so you don&#8217;t want too many.  How should you tradeoff then?</p>
<p>We can define a loss by <img src='http://l.wordpress.com/latex.php?latex=%5Cell+%3D+R%28n%29+%2B+%7C%5Cmathbb%7BE%7D_%7Bz+%5Csim+%5Chat%7Bp_n%7D%28z+%7C+x%29%7D%5Bf%28z%29%5D+-+%5Cmathbb%7BE%7D_%7Bz+%5Csim+p%28z+%7C+x%29%7D%5Bf%28z%29%5D%7C&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\ell = R(n) + |\mathbb{E}_{z \sim \hat{p_n}(z | x)}[f(z)] - \mathbb{E}_{z \sim p(z | x)}[f(z)]|' title='\ell = R(n) + |\mathbb{E}_{z \sim \hat{p_n}(z | x)}[f(z)] - \mathbb{E}_{z \sim p(z | x)}[f(z)]|' class='latex' />, that is, how far off our sampled estimate is from the truth, plus the cost of obtaining those samples.  Using Hoeffding, we can bound the loss <img src='http://l.wordpress.com/latex.php?latex=%5Cell+%3C+R%28n%29+%2B+%5Cepsilon&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\ell &lt; R(n) + \epsilon' title='\ell &lt; R(n) + \epsilon' class='latex' /> with probability <img src='http://l.wordpress.com/latex.php?latex=1+-+2+%5Cexp%28+-2+n+%5Cepsilon%5E2%29&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='1 - 2 \exp( -2 n \epsilon^2)' title='1 - 2 \exp( -2 n \epsilon^2)' class='latex' />.  This expression gives you something to think about when you&#39;re trying to decide how many samples to take &#8212; more samples loosen the bound but increase its probability.  </p>
<p>If your cost is linear, <img src='http://l.wordpress.com/latex.php?latex=R%28n%29+%3D+a+n&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='R(n) = a n' title='R(n) = a n' class='latex' />, you might want to choose<br />
something like <img src='http://l.wordpress.com/latex.php?latex=n+%3D+%5Cfrac%7B%5Cepsilon%7D%7Ba%7D&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='n = \frac{\epsilon}{a}' title='n = \frac{\epsilon}{a}' class='latex' />, which gives you a loss of <img src='http://l.wordpress.com/latex.php?latex=%5Cell+%3C+2+%5Cepsilon&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\ell &lt; 2 \epsilon' title='\ell &lt; 2 \epsilon' class='latex' /> with probability <img src='http://l.wordpress.com/latex.php?latex=1+-+2+%5Cexp%28-2+%5Cepsilon%5E3+%2F+a%29&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='1 - 2 \exp(-2 \epsilon^3 / a)' title='1 - 2 \exp(-2 \epsilon^3 / a)' class='latex' />.  </p>
<p>The plot below shows what might happen if you make such a choice.  Here, I&#39;ve let the posterior be an equiprobable binomial distribution.  The function I&#39;m computing is the identity <img src='http://l.wordpress.com/latex.php?latex=f%28z%29+%3D+z&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='f(z) = z' title='f(z) = z' class='latex' />.  The curves show the loss, <img src='http://l.wordpress.com/latex.php?latex=%5Cell&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='\ell' title='\ell' class='latex' /> for various  choices of the cost parameter <img src='http://l.wordpress.com/latex.php?latex=a&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='a' title='a' class='latex' /> as a function of the number of samples.   The dots show the chosen values of <img src='http://l.wordpress.com/latex.php?latex=n&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='n' title='n' class='latex' /> for each value of <img src='http://l.wordpress.com/latex.php?latex=a&#038;bg=ffffff&#038;fg=444444&#038;s=0' alt='a' title='a' class='latex' />; the horizontal lines show the 80% loss bound for these choices.</p>
<p><a href="http://slycoder.files.wordpress.com/2010/01/costly-sampling1.png"><img src="http://slycoder.files.wordpress.com/2010/01/costly-sampling1.png?w=500&#038;h=285" alt="" title="The cost of sampling" width="500" height="285" class="aligncenter size-full wp-image-442" /></a></p>
<p>Turns out for some reasonable values, you really should stick to about 12 samples.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/slycoder.wordpress.com/441/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/slycoder.wordpress.com/441/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/slycoder.wordpress.com/441/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/slycoder.wordpress.com/441/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/slycoder.wordpress.com/441/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/slycoder.wordpress.com/441/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/slycoder.wordpress.com/441/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/slycoder.wordpress.com/441/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pleasescoopme.com&blog=5562246&post=441&subd=slycoder&ref=&feed=1" />]]></content:encoded>
			<wfw:commentRss>http://pleasescoopme.com/2010/01/23/the-cost-of-a-sample/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/472fd1b0a3858a5ccfb0cc27411079e9?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">slycoder</media:title>
		</media:content>

		<media:content url="http://slycoder.files.wordpress.com/2010/01/costly-sampling1.png" medium="image">
			<media:title type="html">The cost of sampling</media:title>
		</media:content>
	</item>
	</channel>
</rss>